feat: deprecate sdk as dependency (#2149)

47477909 · Biswa Panda · GitHub · 095ea3e7 · 47477909 · 47477909
Unverified Commit 47477909 authored Jul 28, 2025 by Biswa Panda Committed by GitHub Jul 29, 2025
20 changed files
--- a/.devcontainer/post-create.sh
+++ b/.devcontainer/post-create.sh
@@ -52,10 +52,6 @@ export CARGO_TARGET_DIR=$HOME/dynamo/.build/target
 cargo build --locked --profile dev --features mistralrs
 cargo doc --no-deps
-# create symlinks for the binaries in the deploy directory
-mkdir -p $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin
-ln -sf $HOME/dynamo/.build/target/debug/dynamo-run $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
 # install the python bindings
 cd $HOME/dynamo/lib/bindings/python && retry maturin develop

--- a/.gitignore
+++ b/.gitignore
@@ -89,6 +89,4 @@ generated-values.yaml
 TensorRT-LLM
 # Local build artifacts for devcontainer
 .build/
-# Copied binaries to ignore
\ No newline at end of file
-deploy/sdk/src/dynamo/sdk/cli/bin
--- a/Earthfile
+++ b/Earthfile
@@ -115,14 +115,6 @@ dynamo-build:
    RUN cargo build --release --locked --features llamacpp,cuda && \
        cargo doc --no-deps
-    # Create symlinks for wheel building
-    RUN mkdir -p /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/ && \
-        # Remove existing symlinks
-        rm -f /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/* && \
-        # Create new symlinks pointing to the correct location
-        ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
    RUN cd /workspace/lib/bindings/python && \
        uv build --wheel --out-dir /workspace/dist --python 3.12
    RUN cd /workspace && \

--- a/components/planner/src/dynamo/planner/__init__.py
+++ b/components/planner/src/dynamo/planner/__init__.py
@@ -19,10 +19,12 @@ __all__ = [
    "KubernetesConnector",
    "LoadPlannerDefaults",
    "SLAPlannerDefaults",
+    "ServiceConfig",
 ]
 # Import the classes
 from dynamo.planner.circusd import CircusController
+from dynamo.planner.config import ServiceConfig
 from dynamo.planner.defaults import LoadPlannerDefaults, SLAPlannerDefaults
 from dynamo.planner.kubernetes_connector import KubernetesConnector
 from dynamo.planner.planner_connector import PlannerConnector
--- a/deploy/sdk/src/dynamo/sdk/lib/config.py
+++ b/deploy/sdk/src/dynamo/sdk/lib/config.py
--- a/components/planner/src/dynamo/planner/prometheus.py
+++ b/components/planner/src/dynamo/planner/prometheus.py
@@ -20,9 +20,9 @@ import tempfile
 import yaml
+from dynamo.planner.config import ServiceConfig
 from dynamo.planner.defaults import SLAPlannerDefaults
 from dynamo.runtime import DistributedRuntime, dynamo_worker
-from dynamo.sdk.lib.config import ServiceConfig
 logger = logging.getLogger(__name__)

--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
@@ -258,7 +258,7 @@ COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
 USER $USERNAME
 ENV HOME=/home/$USERNAME
-ENV PYTHONPATH=/workspace/dynamo/deploy/sdk/src:/workspace/dynamo/components/planner/src:/workspace/examples/sglang:$PYTHONPATH
+ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang:$PYTHONPATH
 WORKDIR $HOME
 # https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
@@ -324,7 +324,6 @@ COPY rust-toolchain.toml /workspace/
 COPY lib/ /workspace/lib/
 COPY components /workspace/components
 COPY launch /workspace/launch
-COPY deploy/sdk /workspace/deploy/sdk
 RUN cargo build \
 	--release \
@@ -375,7 +374,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc
-ENV PYTHONPATH=/workspace/dynamo/deploy/sdk/src:/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH
+ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH
 ########################################
 ########## Development Image ###########

--- a/container/Dockerfile.sglang-wideep
+++ b/container/Dockerfile.sglang-wideep
@@ -122,8 +122,6 @@ ARG CARGO_BUILD_JOBS
 ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
 RUN cargo build --release
-RUN mkdir -p deploy/sdk/src/dynamo/sdk/cli/bin
-RUN cp target/release/dynamo-run deploy/sdk/src/dynamo/sdk/cli/bin
 RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../..
 RUN pip install --break-system-packages -e .

--- a/container/Dockerfile.tensorrt_llm
+++ b/container/Dockerfile.tensorrt_llm
@@ -278,7 +278,6 @@ COPY rust-toolchain.toml /workspace/
 COPY lib/ /workspace/lib/
 COPY components /workspace/components
 COPY launch /workspace/launch
-COPY deploy/sdk /workspace/deploy/sdk
 RUN cargo build \
 	--release \

--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -287,7 +287,7 @@ RUN uv pip install maturin[patchelf]
 USER $USERNAME
 ENV HOME=/home/$USERNAME
-ENV PYTHONPATH=$HOME/dynamo/deploy/sdk/src:$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
+ENV PYTHONPATH=$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
 ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
 WORKDIR $HOME
@@ -354,7 +354,6 @@ COPY rust-toolchain.toml /workspace/
 COPY lib/ /workspace/lib/
 COPY components /workspace/components
 COPY launch /workspace/launch
-COPY deploy/sdk /workspace/deploy/sdk
 RUN cargo build \
 	--release \

--- a/deploy/CONTRIBUTING.md
+++ b/deploy/CONTRIBUTING.md
@@ -121,14 +121,6 @@ go test ./... -v
 go test -race ./...
 ```
-**Python Tests (SDK):**
-```bash
-cd deploy/sdk
-pytest tests/ -v
-pytest tests/ --cov=dynamo.sdk
-```
 ### Integration Tests
 **End-to-End Deployment Tests:**

--- a/deploy/sdk/README.md
+++ b/deploy/sdk/README.md
-# Dynamo SDK
-Dynamo is a python based SDK for building and deploying distributed inference applications. Dynamo leverages concepts from open source projects like [BentoML](https://github.com/bentoml/bentoml) to provide a developer friendly experience to go from local development to K8s deployment.
-## Installation
-```bash
-pip install ai-dynamo
-```
-## Quickstart
-Lets build a simple distributed pipeline with 3 components: `Frontend`, `Middle` and `Backend`. The structure of the pipeline looks like this:
-```
-Users/Clients (HTTP)
-      │
-      ▼
-┌─────────────┐
-│  Frontend   │  HTTP API endpoint (/generate)
-└─────────────┘
-      │
-      ▼
-┌─────────────┐
-│   Middle    │
-└─────────────┘
-      │
-      ▼
-┌─────────────┐
-│  Backend    │
-└─────────────┘
-```
-The code for the pipeline looks like this:
-```python
-# filename: pipeline.py
-from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-from dynamo.sdk import DYNAMO_IMAGE, depends, endpoint, service, api
-class RequestType(BaseModel):
-    text: str
-class ResponseType(BaseModel):
-    text: str
-@service(
-    dynamo={"namespace": "inference"},
-)
-class Backend:
-    @endpoint()
-    async def generate(self, req: RequestType):
-        text = f"{req.text}-back"
-        for token in text.split():
-            yield f"Backend: {token}"
-@service(
-    dynamo={"namespace": "inference"},
-)
-class Middle:
-    backend = depends(Backend)
-    @endpoint()
-    async def generate(self, req: RequestType):
-        text = f"{req.text}-mid"
-        next_request = RequestType(text=text).model_dump_json()
-        async for response in self.backend.generate(next_request):
-            yield f"Middle: {response}"
-app = FastAPI(title="Hello World!")
-@service(
-    dynamo={"namespace": "inference"},
-    app=app,
-)
-class Frontend:
-    middle = depends(Middle)
-    @api()
-    async def generate(self, request: RequestType):
-        async def content_generator():
-            async for response in self.middle.generate(request.model_dump_json()):
-                yield f"Frontend: {response}"
-        return StreamingResponse(content_generator())
-```
-You can run this pipeline locally by spinning up ETCD and NATS and then running the pipeline:
-```bash
-# Spin up ETCD and NATS
-docker compose -f deploy/docker-compose.yml up -d
-```
-then
-```bash
-# Run the pipeline
-dynamo serve pipeline:Frontend
-```
-Once it's up and running, you can make a request to the pipeline using
-```bash
-curl -X POST http://localhost:8080/generate \
-    -H "Content-Type: application/json" \
-    -d '{"text": "federer"}'
-```
-You should see the following output
-```bash
-federer-mid-back
-```
-You can find in-depth documentation for the Dynamo SDK [here](./docs/sdk/README.md) and the Dynamo CLI [here](./docs/cli/README.md)
-Please refer to [hello_world](../../../examples/hello_world/README.md) and [llm](../../../examples/llm/README.md) for examples.
--- a/deploy/sdk/docs/cli/README.md
+++ b/deploy/sdk/docs/cli/README.md
-../../../../docs/guides/cli_overview.md
\ No newline at end of file
--- a/deploy/sdk/docs/sdk/README.md
+++ b/deploy/sdk/docs/sdk/README.md
-../../../../docs/API/sdk.md
\ No newline at end of file
--- a/deploy/sdk/src/dynamo/sdk/__init__.py
+++ b/deploy/sdk/src/dynamo/sdk/__init__.py
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#  #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#  #
-#  http://www.apache.org/licenses/LICENSE-2.0
-#  #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-import warnings
-from typing import Any
-warnings.filterwarnings("ignore", category=UserWarning, message=".*pkg_resources.*")
-# flake8: noqa: E402
-from dynamo.sdk.core.decorators.endpoint import abstract_endpoint, api, endpoint
-from dynamo.sdk.core.lib import DYNAMO_IMAGE, depends, liveness, readiness, service
-from dynamo.sdk.core.protocol.interface import AbstractService
-from dynamo.sdk.lib.decorators import async_on_start, on_shutdown
-from dynamo.sdk.lib.utils import get_capi_library_path
-dynamo_context: dict[str, Any] = {}
-__all__ = [
-    "DYNAMO_IMAGE",
-    "on_shutdown",
-    "async_on_start",
-    "depends",
-    "dynamo_context",
-    "endpoint",
-    "api",
-    "service",
-    "AbstractService",
-    "abstract_endpoint",
-    "liveness",
-    "readiness",
-    "get_capi_library_path",
-]
--- a/deploy/sdk/src/dynamo/sdk/cli/Dockerfile.template
+++ b/deploy/sdk/src/dynamo/sdk/cli/Dockerfile.template
-# Use ARG to allow base image to be specified at build time
-ARG BASE_IMAGE=__BASE_IMAGE__
-FROM ${BASE_IMAGE}
-# Build arguments for user configuration
-ARG USER_ID=1024
-ARG GROUP_ID=1024
-ARG USERNAME=dynamo
-ARG GROUPNAME=dynamo
-ARG HOME_DIR=/home/${USERNAME}
-# Set environment variables
-ENV PYTHONUNBUFFERED=1
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PATH="${HOME_DIR}/.local/bin:$PATH"
-ENV PYTHONPATH="${HOME_DIR}/app:$PYTHONPATH"
-# Create group and user
-RUN if [ "$(id -u)" != "0" ]; then \
-        echo "Using sudo for user/group creation"; \
-        sudo groupadd --gid ${GROUP_ID} ${GROUPNAME} \
-        && sudo useradd --uid ${USER_ID} --gid ${GROUP_ID} --create-home --shell /bin/bash ${USERNAME} \
-        && sudo mkdir -p ${HOME_DIR}/app \
-        && sudo mkdir -p ${HOME_DIR}/.local/bin \
-        && sudo mkdir -p ${HOME_DIR}/.cache/pip \
-        && sudo chown -R ${USERNAME}:${GROUPNAME} ${HOME_DIR}; \
-    else \
-        echo "Running as root, no sudo needed"; \
-        groupadd --gid ${GROUP_ID} ${GROUPNAME} \
-        && useradd --uid ${USER_ID} --gid ${GROUP_ID} --create-home --shell /bin/bash ${USERNAME} \
-        && mkdir -p ${HOME_DIR}/app \
-        && mkdir -p ${HOME_DIR}/.local/bin \
-        && mkdir -p ${HOME_DIR}/.cache/pip \
-        && chown -R ${USERNAME}:${GROUPNAME} ${HOME_DIR}; \
-    fi
-# Switch to non-root user
-USER ${USERNAME}
-WORKDIR ${HOME_DIR}/app
-# Copy application code
-COPY --chown=${USERNAME}:${GROUPNAME} . .
-RUN chmod +x ${HOME_DIR}/app
--- a/deploy/sdk/src/dynamo/sdk/cli/allocator.py
+++ b/deploy/sdk/src/dynamo/sdk/cli/allocator.py
-#  SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#  #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#  #
-#  http://www.apache.org/licenses/LICENSE-2.0
-#  #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#  Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
-from __future__ import annotations
-import logging
-import os
-from typing import Any
-from dynamo.sdk.core.protocol.interface import ServiceInterface
-# Import our own resource module
-from dynamo.sdk.lib.resource import (
-    NVIDIA_GPU,
-    GPUManager,
-    ResourceError,
-    system_resources,
-)
-logger = logging.getLogger(__name__)
-# Constants
-DYN_DISABLE_AUTO_GPU_ALLOCATION = "DYN_DISABLE_AUTO_GPU_ALLOCATION"
-DYN_DEPLOYMENT_ENV = "DYN_DEPLOYMENT_ENV"
-logger = logging.getLogger(__name__)
-def format_memory_gb(memory_bytes: float) -> str:
-    """Convert memory from bytes to formatted GB string.
-    Args:
-        memory_bytes: Memory size in bytes
-    Returns:
-        Formatted string with memory size in GB with 1 decimal place
-    """
-    return f"{memory_bytes/1024/1024/1024:.1f}GB"
-class ResourceAllocator:
-    def __init__(self) -> None:
-        """Initialize the resource allocator."""
-        self.system_resources = system_resources()
-        self.gpu_manager = GPUManager()
-        self.remaining_gpus = len(self.system_resources[NVIDIA_GPU])
-        # For compatibility with the old implementation
-        self._available_gpus: list[tuple[float, float]] = [
-            (1.0, 1.0)  # each item is (remaining, unit)
-            for _ in range(self.remaining_gpus)
-        ]
-        self._service_gpu_allocations: dict[str, list[int]] = {}
-        logger.debug(
-            f"ResourceAllocator initialized with {self.remaining_gpus} GPUs available"
-        )
-    def assign_gpus(self, count: float, service_name: str = "") -> list[int]:
-        """
-        Assign GPUs for use.
-        Args:
-            count: Number of GPUs to assign (can be fractional)
-        Returns:
-            List of GPU indices that were assigned
-        """
-        if count > self.remaining_gpus:
-            logger.warning(
-                f"Requested {count} GPUs, but only {self.remaining_gpus} are remaining. "
-                f"Serving may fail due to inadequate GPUs. Set {DYN_DISABLE_AUTO_GPU_ALLOCATION}=1 "
-                "to disable automatic allocation and allocate GPUs manually."
-            )
-        self.remaining_gpus = int(max(0, self.remaining_gpus - count))
-        assigned = []  # Will store assigned GPU indices
-        if count < 1:  # a fractional GPU
-            try:
-                # try to find the GPU used with the same fragment
-                gpu = next(
-                    i
-                    for i, v in enumerate(self._available_gpus)
-                    if v[0] > 0 and v[1] == count
-                )
-            except StopIteration:
-                try:
-                    gpu = next(
-                        i for i, v in enumerate(self._available_gpus) if v[0] == 1.0
-                    )
-                except StopIteration:
-                    gpu = len(self._available_gpus)
-                    self._available_gpus.append((1.0, count))
-            remaining, _ = self._available_gpus[gpu]
-            if (remaining := remaining - count) < count:
-                # can't assign to the next one, mark it as zero.
-                self._available_gpus[gpu] = (0.0, count)
-            else:
-                self._available_gpus[gpu] = (remaining, count)
-            assigned = [gpu]
-        else:  # allocate n GPUs, n is a positive integer
-            if int(count) != count:
-                raise ResourceError("Float GPUs larger than 1 is not supported")
-            count = int(count)
-            unassigned = [
-                gpu
-                for gpu, value in enumerate(self._available_gpus)
-                if value[0] > 0 and value[1] == 1.0
-            ]
-            if len(unassigned) < count:
-                logger.warning(f"Not enough GPUs to be assigned, {count} is requested")
-                for _ in range(count - len(unassigned)):
-                    unassigned.append(len(self._available_gpus))
-                    self._available_gpus.append((1.0, 1.0))
-            for gpu in unassigned[:count]:
-                self._available_gpus[gpu] = (0.0, 1.0)
-            assigned = unassigned[:count]
-        # Store the allocation if service_name is provided
-        if service_name and assigned:
-            if service_name in self._service_gpu_allocations:
-                self._service_gpu_allocations[service_name].extend(assigned)
-                logger.debug(
-                    f"Additional GPUs {assigned} allocated to service '{service_name}', "
-                    f"total GPUs: {self._service_gpu_allocations[service_name]}"
-                )
-            else:
-                self._service_gpu_allocations[service_name] = assigned
-                logger.debug(f"GPUs {assigned} allocated to service '{service_name}'")
-        elif assigned:
-            logger.debug(f"GPUs {assigned} allocated without service name tracking")
-        return assigned
-    def get_gpu_stats(self) -> list[dict[str, Any]]:
-        """Get detailed statistics for all GPUs."""
-        return self.gpu_manager.get_gpu_stats()
-    def get_resource_envs(
-        self,
-        service: ServiceInterface[Any],
-    ) -> tuple[int, list[dict[str, str]]]:
-        """
-        Get resource environment variables for a service.
-        Args:
-            service: The service to get resource environment variables for
-        Returns:
-            Tuple of (number of workers, list of environment variables dictionaries)
-        """
-        logger.info(f"Getting resource envs for service {service.name}")
-        services = service.get_service_configs()
-        if service.name not in services:
-            logger.warning(f"No service configs found for {service.name}")
-            return 1, []  # Default to 1 worker, no special resources
-        config = services[service.name]
-        logger.debug(f"Using config for {service.name}: {config}")
-        num_gpus = 0
-        num_workers = 1
-        resource_envs: list[dict[str, str]] = []
-        # Check if service requires GPUs
-        if "gpu" in (config.get("resources") or {}):
-            num_gpus = int(config["resources"]["gpu"])  # type: ignore
-            logger.info(f"GPU requirement found: {num_gpus}")
-            # Check if we have enough GPUs
-            available_gpus = self.gpu_manager.get_available_gpus()
-            if num_gpus > len(available_gpus):
-                logger.warning(
-                    f"Requested {num_gpus} GPUs, but only {len(available_gpus)} are available. "
-                    f"Service may fail due to inadequate GPU resources."
-                )
-        # Determine number of workers
-        if config.get("workers"):
-            num_workers = config["workers"]
-            logger.info(f"Using configured worker count: {num_workers}")
-        # Handle GPU allocation
-        if num_gpus and DYN_DISABLE_AUTO_GPU_ALLOCATION not in os.environ:
-            logger.info("GPU allocation enabled")
-            if os.environ.get(DYN_DEPLOYMENT_ENV):
-                logger.info("K8s deployment detected")
-                # K8s replicas: Assumes DYNAMO_DEPLOYMENT_ENV is set
-                # each pod in replicaset will have separate GPU with same CUDA_VISIBLE_DEVICES
-                assigned = self.assign_gpus(num_gpus, service.name)
-                logger.info(f"Assigned GPUs for K8s: {assigned}")
-                # Generate environment variables for each worker
-                for _ in range(num_workers):
-                    env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))}
-                    resource_envs.append(env_vars)
-            else:
-                logger.info(
-                    f"Local deployment detected. Allocating GPUs for {num_workers} workers of '{service.name}'"
-                )
-                # Local deployment where we split all available GPUs across workers
-                for worker_id in range(num_workers):
-                    assigned = self.assign_gpus(num_gpus, service.name)
-                    logger.debug(
-                        f"Worker {worker_id} of '{service.name}' assigned GPUs: {assigned}"
-                    )
-                    # Generate environment variables for this worker
-                    env_vars = {"CUDA_VISIBLE_DEVICES": ",".join(map(str, assigned))}
-                    # If we have comprehensive GPU stats, log them
-                    try:
-                        gpu_stats = [
-                            stat
-                            for stat in self.get_gpu_stats()
-                            if stat["index"] in assigned
-                        ]
-                        for stat in gpu_stats:
-                            logger.info(
-                                f"GPU {stat['index']} ({stat['name']}): "
-                                f"Memory: {format_memory_gb(stat['free_memory'])} free / "
-                                f"{format_memory_gb(stat['total_memory'])} total, "
-                                f"Utilization: {stat['gpu_utilization']}% "
-                            )
-                    except Exception as e:
-                        logger.debug(f"Failed to get GPU stats: {e}")
-                    resource_envs.append(env_vars)
-        logger.info(
-            f"Final resource allocation - workers: {num_workers}, envs: {resource_envs}"
-        )
-        return num_workers, resource_envs
-    def reset_allocations(self):
-        """Reset all GPU allocations."""
-        self.gpu_manager.reset_allocations()
-        # Reset legacy tracking
-        self._available_gpus = [(1.0, 1.0) for _ in range(self.remaining_gpus)]
--- a/deploy/sdk/src/dynamo/sdk/cli/circus.py
+++ b/deploy/sdk/src/dynamo/sdk/cli/circus.py
-#  SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#  #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#  #
-#  http://www.apache.org/licenses/LICENSE-2.0
-#  #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#  Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
-# Once planner v1 goes live - this will be be full of more granular APIs
-from __future__ import annotations
-import contextlib
-import os
-import pathlib
-import shlex
-import sys
-from dataclasses import dataclass
-from typing import Any, Callable
-import psutil
-from circus.arbiter import Arbiter as _Arbiter
-from circus.sockets import CircusSocket
-from circus.watcher import Watcher
-from .utils import ServiceProtocol, reserve_free_port
-class Arbiter(_Arbiter):
-    """Arbiter with cleanup support via exit_stack."""
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        super().__init__(*args, **kwargs)
-        self.exit_stack = contextlib.ExitStack()
-    def start(self, cb: Callable[[Any], Any] | None = None) -> None:
-        """Start arbiter and enter context."""
-        self.exit_stack.__enter__()
-        fut = super().start(cb)
-        if exc := fut.exception():
-            raise exc
-    def stop(self) -> None:
-        """Stop arbiter and cleanup resources."""
-        self.exit_stack.__exit__(None, None, None)
-        return super().stop()
-@dataclass
-class CircusRunner:
-    """Simple server wrapper for arbiter lifecycle management."""
-    arbiter: Arbiter
-    def stop(self) -> None:
-        self.arbiter.stop()
-    @property
-    def running(self) -> bool:
-        return self.arbiter.running
-    def __enter__(self) -> CircusRunner:
-        return self
-    def __exit__(self, *_: Any) -> None:
-        self.stop()
-MAX_AF_UNIX_PATH_LENGTH = 103
-def create_circus_watcher(
-    name: str,
-    args: list[str],
-    *,
-    cmd: str = sys.executable,
-    use_sockets: bool = True,
-    **kwargs: Any,
-) -> Watcher:
-    log_dir = os.environ.get("DYN_CIRCUS_LOG_DIR", None)
-    if log_dir is not None:
-        prefix = f"{log_dir}/{name}"
-        os.makedirs(prefix, exist_ok=True)
-        stdout_stream = {
-            "class": "FileStream",
-            "filename": f"{prefix}/output.log",
-            "backup_count": 10,
-        }
-        stderr_stream = {
-            "class": "FileStream",
-            "filename": f"{prefix}/error.log",
-            "backup_count": 10,
-        }
-    else:
-        stdout_stream = None
-        stderr_stream = None
-    return Watcher(
-        name=name,
-        cmd=shlex.quote(cmd) if psutil.POSIX else cmd,
-        args=args,
-        copy_env=True,
-        stop_children=True,
-        use_sockets=use_sockets,
-        graceful_timeout=86400,
-        respawn=os.environ.get("DYN_CIRCUS_RESPAWN", "false").lower()
-        in ("true", "1", "yes"),
-        stdout_stream=stdout_stream,
-        stderr_stream=stderr_stream,
-        **kwargs,
-    )
-def get_env_or_reserved_port(env_var):
-    port_env = os.environ.get(env_var)
-    if port_env:
-        return int(port_env)
-    else:
-        with reserve_free_port() as port:  # type: ignore
-            return port
-def create_arbiter(
-    watchers: list[Watcher], *, threaded: bool = False, **kwargs: Any
-) -> Arbiter:
-    endpoint_port = get_env_or_reserved_port("DYN_CIRCUS_ENDPOINT_PORT")
-    pubsub_port = get_env_or_reserved_port("DYN_CIRCUS_PUBSUB_PORT")
-    return Arbiter(
-        watchers,
-        endpoint=f"tcp://127.0.0.1:{endpoint_port}",
-        pubsub_endpoint=f"tcp://127.0.0.1:{pubsub_port}",
-        check_delay=kwargs.pop("check_delay", 10),
-        **kwargs,
-    )
-def path_to_uri(path: str) -> str:
-    """
-    Convert a path to a URI.
-    Args:
-        path: Path to convert to URI.
-    Returns:
-        URI string. (quoted, absolute)
-    """
-    return pathlib.PurePosixPath(path).as_uri()
-def _get_server_socket(
-    service: ServiceProtocol,
-    uds_path: str,
-) -> tuple[str, CircusSocket]:
-    """Create a Unix Domain Socket for a service.
-    Args:
-        service: The service to create a socket for
-        uds_path: Base directory for Unix Domain Sockets
-        port_stack: Not used in POSIX implementation, kept for interface compatibility
-    Returns:
-        Tuple of (socket URI, CircusSocket object)
-    Raises:
-        AssertionError: If socket path exceeds maximum length
-    """
-    socket_path = os.path.join(uds_path, f"{id(service)}.sock")
-    assert (
-        len(socket_path) < MAX_AF_UNIX_PATH_LENGTH
-    ), f"Socket path '{socket_path}' exceeds maximum length of {MAX_AF_UNIX_PATH_LENGTH}"
-    return path_to_uri(socket_path), CircusSocket(name=service.name, path=socket_path)
--- a/deploy/sdk/src/dynamo/sdk/cli/cli.py
+++ b/deploy/sdk/src/dynamo/sdk/cli/cli.py
-#  SPDX-FileCopyrightText: Copyright (c) 2020 Atalaya Tech. Inc
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#  #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#  #
-#  http://www.apache.org/licenses/LICENSE-2.0
-#  #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#  Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
-from __future__ import annotations
-import importlib.metadata
-import typer
-from rich.console import Console
-from dynamo.sdk.cli.env import env
-from dynamo.sdk.cli.run import run
-from dynamo.sdk.cli.serve import serve
-console = Console()
-cli = typer.Typer(
-    context_settings={"help_option_names": ["-h", "--help"]},
-    name="dynamo",
-    no_args_is_help=True,
-    pretty_exceptions_enable=False,
-)
-def version_callback(value: bool):
-    if value:
-        version = importlib.metadata.version("ai-dynamo")
-        console.print(
-            f"[bold green]Dynamo CLI[/bold green] version: [cyan]{version}[/cyan]"
-        )
-        raise typer.Exit()
-@cli.callback()
-def main(
-    version: bool = typer.Option(
-        False,
-        "--version",
-        "-v",
-        help="Show the application version and exit.",
-        callback=version_callback,
-        is_eager=True,
-    ),
-):
-    """
-    The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications.
-    At a high level, you use `serve` to run a set of dynamo services locally,
-    `build` and `containerize` to package them up for deployment, and then `cloud`
-    and `deploy` to deploy them to a K8s cluster running the Dynamo Cloud
-    """
-cli.command()(env)
-cli.command(
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
-)(serve)
-cli.command(
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
-    add_help_option=False,
-)(run)
-if __name__ == "__main__":
-    cli()
--- a/deploy/sdk/src/dynamo/sdk/cli/env.py
+++ b/deploy/sdk/src/dynamo/sdk/cli/env.py
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#  #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#  #
-#  http://www.apache.org/licenses/LICENSE-2.0
-#  #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from __future__ import annotations
-import platform
-import subprocess
-import sys
-import distro
-import pkg_resources
-def get_os_version() -> str:
-    """Get OS version."""
-    # TODO: Revisit once we need to support Windows based systems
-    return f"{distro.name()} {distro.version()}"
-def execute_subprocess_output(command: str) -> str:
-    """Execute a subprocess command and return the output."""
-    try:
-        out = subprocess.check_output(command, shell=True, stderr=subprocess.DEVNULL)
-        if not out.strip():
-            return "N/A"
-        return out.decode("utf-8").strip()
-    except subprocess.CalledProcessError:
-        return "N/A"
-def get_glibc_version() -> str:
-    """Get GLIBC version."""
-    return execute_subprocess_output("ldd --version | head -n 1 | awk '{print $NF}'")
-def get_gcc_version() -> str:
-    """Get GCC version."""
-    return execute_subprocess_output("gcc --version | head -n 1 | awk '{print $NF}'")
-def get_cmake_version() -> str:
-    """Get Cmake version."""
-    return execute_subprocess_output("cmake --version | head -n 1 | awk '{print $NF}'")
-def get_rust_version() -> str:
-    """Get Rust version."""
-    return execute_subprocess_output(
-        "rustc --version | head -n 1 | awk '{print $(NF-2)}'"
-    )
-def get_docker_version() -> str:
-    """Get Docker version."""
-    return execute_subprocess_output("docker --version | awk '{print $3}' | tr -d ','")
-def get_cpu_architecture() -> str:
-    """Get CPU architecture."""
-    return execute_subprocess_output("lscpu")
-def query_nvidia_smi(param: str) -> str:
-    """Get GPU information from nvidia-smi if available"""
-    return execute_subprocess_output(
-        f"nvidia-smi --query-gpu={param} --format=csv,noheader"
-    )
-def get_gpu_topo() -> str:
-    """Get GPU topology if available"""
-    return execute_subprocess_output("nvidia-smi topo -m")
-def get_cuda_version() -> str:
-    """Get CUDA version if available."""
-    return execute_subprocess_output(r"nvcc --version | grep -Po 'release \K\d+\.\d+'")
-def get_python_platform():
-    return platform.platform()
-def get_installed_packages() -> list[tuple[str, str]]:
-    """Get list of installed Python packages and their versions."""
-    return [(pkg.key, pkg.version) for pkg in pkg_resources.working_set]
-def get_python_packages() -> str:
-    """Get list of specified Python packages and their versions."""
-    installed_packages = get_installed_packages()
-    out = []
-    search_python_packages = [
-        "ai-dynamo",
-        "ai-dynamo-runtime",
-        "ai-dynamo-vllm",
-        "genai-perf",
-        "nixl",
-        "numpy",
-        "nvidia-cublas-cu12",
-        "nvidia-cuda-cupti-cu12",
-        "nvidia-cuda-nvrtc-cu12",
-        "nvidia-cuda-runtime-cu12",
-        "nvidia-cudnn-cu12",
-        "nvidia-cufft-cu12",
-        "nvidia-curand-cu12",
-        "nvidia-cusolver-cu12",
-        "nvidia-cusparse-cu12",
-        "nvidia-ml-py",
-        "nvidia-nccl-cu12",
-        "nvidia-nvjitlink-cu12",
-        "nvidia-nvtx-cu12",
-        "pyzmq",
-        "tensorrt_llm",
-        "torch",
-        "torchaudio",
-        "transformers",
-        "tritonclient",
-    ]
-    for pkg_name in search_python_packages:
-        version = next(
-            (version for name, version in installed_packages if name == pkg_name), None
-        )
-        if version:
-            out.append(f"{pkg_name}: {version}")
-        else:
-            out.append(f"{pkg_name}: Not installed")
-    return "\n".join(out)
-def env() -> None:
-    """Display information about the current environment."""
-    print("System Information:")
-    print(f"OS: {get_os_version()}")
-    print(f"Glibc Version: {get_glibc_version()}")
-    print(f"GCC Version: {get_gcc_version()}")
-    print(f"Cmake Version: {get_cmake_version()}")
-    print(f"Rust Version: {get_rust_version()}")
-    print(f"Docker Version: {get_docker_version()}")
-    print("\nCPU Information:")
-    print(f"{get_cpu_architecture()}")
-    # Python Environment
-    py_version = sys.version.split()[0]
-    print(f"\nPython Version: {py_version}")
-    print(f"Python Platform: {get_python_platform()}")
-    print("\nPython Packages:")
-    print(f"{get_python_packages()}")