feat: remove dynamo deployment from cli (#1742)

fbd1f8df · Biswa Panda · GitHub · 84e71e27 · fbd1f8df · fbd1f8df
Unverified Commit fbd1f8df authored Jul 08, 2025 by Biswa Panda Committed by GitHub Jul 08, 2025
15 changed files
--- a/components/planner/README.md
+++ b/components/planner/README.md
@@ -124,6 +124,4 @@ For manual testing, you can use the controller_test.py file to add/remove compon

 The Kubernetes backend works by updating the replicas count of the DynamoGraphDeployment custom resource. When the planner determines that workers need to be scaled up or down based on workload metrics, it uses the Kubernetes API to patch the DynamoGraphDeployment resource specification, changing the replicas count for the appropriate worker component. The Kubernetes operator then reconciles this change by creating or terminating the necessary pods. This provides a seamless autoscaling experience in Kubernetes environments without requiring manual intervention.

-The Kubernetes backend will automatically be used by Planner when your pipeline is deployed with `dynamo deployment create`. By default, the planner will run in no-op mode, which means it will monitor metrics but not take scaling actions. To enable actual scaling, you should also specify `--Planner.no-operation=false`.
-
-
+The Kubernetes backend will automatically be used by Planner when your pipeline is deployed using a DynamoGraphDeployment CR. By default, the planner will run in no-op mode, which means it will monitor metrics but not take scaling actions. To enable actual scaling, you should also specify `--Planner.no-operation=false`.
--- a/deploy/inference-gateway/example/README.md
+++ b/deploy/inference-gateway/example/README.md
@@ -32,11 +32,11 @@ export DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built"
 ```bash
 # Deploy first graph
 export DEPLOYMENT_NAME=llm-agg1
-dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml
+# TODO: Deploy your service using a DynamoGraphDeployment CR.

 # Deploy second graph
 export DEPLOYMENT_NAME=llm-agg2
-dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml
+# TODO: Deploy your service using a DynamoGraphDeployment CR.
 ```

 3. **Deploy Inference Gateway**

--- a/deploy/sdk/src/dynamo/sdk/cli/cli.py
+++ b/deploy/sdk/src/dynamo/sdk/cli/cli.py
@@ -23,8 +23,6 @@ import typer
 from rich.console import Console

 from dynamo.sdk.cli.build import build
-from dynamo.sdk.cli.deployment import app as deployment_app
-from dynamo.sdk.cli.deployment import deploy
 from dynamo.sdk.cli.env import env
 from dynamo.sdk.cli.run import run
 from dynamo.sdk.cli.serve import serve
@@ -76,8 +74,6 @@ cli.command(
    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
    add_help_option=False,
 )(run)
-cli.add_typer(deployment_app, name="deployment")
-cli.command()(deploy)
 cli.command()(build)

 if __name__ == "__main__":

--- a/deploy/sdk/src/dynamo/sdk/cli/deployment.py
+++ b/deploy/sdk/src/dynamo/sdk/cli/deployment.py
--- a/deploy/sdk/src/dynamo/sdk/core/deploy/__init__.py
+++ b/deploy/sdk/src/dynamo/sdk/core/deploy/__init__.py
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#  #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#  #
-#  http://www.apache.org/licenses/LICENSE-2.0
-#  #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#  Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
--- a/deploy/sdk/src/dynamo/sdk/core/deploy/consts.py
+++ b/deploy/sdk/src/dynamo/sdk/core/deploy/consts.py
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from enum import Enum
-
-
-class DeploymentTargetType(Enum):
-    """Enum for deployment target types."""
-
-    KUBERNETES = "kubernetes"
--- a/deploy/sdk/src/dynamo/sdk/core/deploy/kubernetes.py
+++ b/deploy/sdk/src/dynamo/sdk/core/deploy/kubernetes.py
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import time
-import typing as t
-
-import requests
-
-from dynamo.sdk.core.protocol.deployment import (
-    Deployment,
-    DeploymentManager,
-    DeploymentResponse,
-    DeploymentStatus,
-)
-from dynamo.sdk.lib.utils import upload_graph
-
-
-class KubernetesDeploymentManager(DeploymentManager):
-    """
-    Implementation of DeploymentManager that talks to the dynamo_store deployment API.
-    Accepts **kwargs for backend-specific options.
-    Handles error reporting and payload construction according to the API schema.
-    Raises exceptions for errors; CLI handles user interaction.
-    """
-
-    def __init__(self, endpoint: str):
-        self.endpoint = endpoint.rstrip("/")
-        self.session = requests.Session()
-
-    def create_deployment(self, deployment: Deployment, **kwargs) -> DeploymentResponse:
-        """Create a new deployment. Ensures all components and versions are registered/uploaded before creating the deployment."""
-        # For each service/component in the deployment, upload it to the API store
-        if not deployment.graph:
-            raise ValueError(
-                "Deployment graph must be provided in the format <name>:<version>"
-            )
-        upload_graph(
-            endpoint=self.endpoint,
-            graph=deployment.graph,
-            entry_service=deployment.entry_service,
-            session=self.session,
-            **kwargs,
-        )
-
-        # Now create the deployment
-        dev = kwargs.get("dev", False)
-        payload = {
-            "name": deployment.name,
-            "component": deployment.graph or deployment.namespace,
-            "dev": dev,
-            "envs": deployment.envs,
-        }
-        payload = {k: v for k, v in payload.items() if v is not None}
-        url = f"{self.endpoint}/api/v2/deployments"
-        try:
-            resp = self.session.post(url, json=payload)
-            resp.raise_for_status()
-            return resp.json()
-        except requests.HTTPError as e:
-            status = e.response.status_code if e.response is not None else None
-            msg = e.response.text if e.response is not None else str(e)
-            if "already exists" in msg:
-                raise RuntimeError((409, msg, None)) from e
-            raise RuntimeError((status, msg, url)) from e
-
-    def update_deployment(
-        self, deployment_id: str, deployment: Deployment, **kwargs
-    ) -> None:
-        """Update an existing deployment."""
-        access_authorization = kwargs.get("access_authorization", False)
-        payload = {
-            "name": deployment.name,
-            "envs": deployment.envs,
-            "services": deployment.services,
-            "access_authorization": access_authorization,
-        }
-        payload = {k: v for k, v in payload.items() if v is not None}
-        url = f"{self.endpoint}/api/v2/deployments/{deployment_id}"
-        try:
-            resp = self.session.put(url, json=payload)
-            resp.raise_for_status()
-        except requests.HTTPError as e:
-            status = e.response.status_code if e.response is not None else None
-            msg = e.response.text if e.response is not None else str(e)
-            raise RuntimeError((status, msg, url))
-
-    def get_deployment(self, deployment_id: str) -> DeploymentResponse:
-        """Get deployment details."""
-        url = f"{self.endpoint}/api/v2/deployments/{deployment_id}"
-        try:
-            resp = self.session.get(url)
-            resp.raise_for_status()
-            return resp.json()
-        except requests.HTTPError as e:
-            status = e.response.status_code if e.response is not None else None
-            msg = e.response.text if e.response is not None else str(e)
-            raise RuntimeError((status, msg, url)) from e
-
-    def list_deployments(self) -> t.List[DeploymentResponse]:
-        """List all deployments."""
-        url = f"{self.endpoint}/api/v2/deployments"
-        try:
-            resp = self.session.get(url)
-            resp.raise_for_status()
-            data = resp.json()
-            return data.get("items", [])
-        except requests.HTTPError as e:
-            msg = e.response.text if e.response is not None else str(e)
-            raise RuntimeError(
-                (e.response.status_code if e.response else None, msg, url)
-            )
-
-    def delete_deployment(self, deployment_id: str) -> None:
-        """Delete a deployment."""
-        url = f"{self.endpoint}/api/v2/deployments/{deployment_id}"
-        try:
-            resp = self.session.delete(url)
-            resp.raise_for_status()
-        except requests.HTTPError as e:
-            status = e.response.status_code if e.response is not None else None
-            msg = e.response.text if e.response is not None else str(e)
-            raise RuntimeError((status, msg, url)) from e
-
-    def get_status(
-        self,
-        deployment_id: str,
-    ) -> DeploymentStatus:
-        dep = self.get_deployment(deployment_id)
-        status = dep.get("status", "unknown")
-        if status == "running":
-            return DeploymentStatus.RUNNING
-        elif status == "failed":
-            return DeploymentStatus.FAILED
-        elif status == "deploying":
-            return DeploymentStatus.IN_PROGRESS
-        elif status == "terminated":
-            return DeploymentStatus.TERMINATED
-        else:
-            return DeploymentStatus.PENDING
-
-    def wait_until_ready(
-        self, deployment_id: str, timeout: int = 3600
-    ) -> t.Tuple[DeploymentResponse, bool]:
-        start = time.time()
-        while time.time() - start < timeout:
-            dep = self.get_deployment(deployment_id)
-            status = self.get_status(deployment_id)
-            if status == DeploymentStatus.RUNNING:
-                return dep, True
-            elif status == DeploymentStatus.FAILED:
-                return dep, False
-            time.sleep(5)
-        return dep, False
-
-    def get_endpoint_urls(
-        self,
-        deployment_id: str,
-    ) -> t.List[str]:
-        dep = self.get_deployment(deployment_id)
-        return dep.get("urls", [])
--- a/deploy/sdk/src/dynamo/sdk/core/protocol/deployment.py
+++ b/deploy/sdk/src/dynamo/sdk/core/protocol/deployment.py
@@ -14,11 +14,7 @@
 # limitations under the License.

 import typing as t
-from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from enum import Enum
-
-import typer


 @dataclass
@@ -75,36 +71,6 @@ class Resources:
                )


-class DeploymentStatus(str, Enum):
-    """Status of a dynamo deployment."""
-
-    PENDING = "pending"
-    IN_PROGRESS = "in progress"
-    RUNNING = "running"
-    FAILED = "failed"
-    TERMINATED = "terminate"
-    SCALED_TO_ZERO = "scaled to zero"
-
-    @property
-    def color(self) -> str:
-        return {
-            DeploymentStatus.RUNNING: "green",
-            DeploymentStatus.IN_PROGRESS: "yellow",
-            DeploymentStatus.PENDING: "yellow",
-            DeploymentStatus.FAILED: "red",
-            DeploymentStatus.TERMINATED: "red",
-            DeploymentStatus.SCALED_TO_ZERO: "yellow",
-        }.get(self, "white")
-
-
-@dataclass
-class ScalingPolicy:
-    """Scaling policy."""
-
-    policy: str
-    parameters: t.Dict[str, t.Union[int, float, str]] = field(default_factory=dict)
-
-
 @dataclass
 class Env:
    """Environment variable."""
@@ -126,149 +92,5 @@ class Service:
    resources: Resources | None = None
    envs: t.List[Env] = field(default_factory=list)
    secrets: t.List[str] = field(default_factory=list)
-    scaling: ScalingPolicy = field(default_factory=lambda: ScalingPolicy(policy="none"))
    apis: dict = field(default_factory=dict)
    size_bytes: int = 0
-
-
-@dataclass
-class Deployment:
-    """Graph deployment."""
-
-    name: str
-    namespace: str
-    graph: t.Optional[str] = None
-    entry_service: t.Optional[Service] = None
-    envs: t.Optional[t.List[t.Dict[str, t.Any]]] = None
-
-
-# Type alias for deployment responses (e.g., from backend APIs)
-DeploymentResponse = t.Dict[str, t.Any]
-
-
-@dataclass
-class DeploymentConfig:
-    """Configuration object for deployment operations.
-
-    Consolidates all deployment parameters including graph configuration,
-    environment variables, and deployment settings.
-    """
-
-    # Core deployment settings
-    graph: str
-    endpoint: str
-    name: t.Optional[str] = None
-    target: str = "kubernetes"
-    dev: bool = False
-
-    # Configuration and timing
-    config_file: t.Optional[typer.FileText] = None
-    wait: bool = True
-    timeout: int = 3600
-
-    # Environment variables
-    envs: t.Optional[t.List[str]] = None
-    envs_from_secret: t.Optional[t.List[str]] = None
-    env_secrets_name: t.Optional[str] = "dynamo-env-secrets"
-
-
-class DeploymentManager(ABC):
-    """Interface for managing dynamo graph deployments."""
-
-    @abstractmethod
-    def create_deployment(self, deployment: Deployment, **kwargs) -> DeploymentResponse:
-        """Create new deployment.
-
-        Args:
-            deployment: Deployment configuration
-            **kwargs: Additional backend-specific arguments
-
-        Returns:
-            The created deployment
-        """
-        pass
-
-    @abstractmethod
-    def update_deployment(self, deployment_id: str, deployment: Deployment) -> None:
-        """Update an existing deployment.
-
-        Args:
-            deployment_id: The ID of the deployment to update
-            deployment: New deployment configuration
-        """
-        pass
-
-    @abstractmethod
-    def get_deployment(self, deployment_id: str) -> DeploymentResponse:
-        """Get deployment details.
-
-        Args:
-            deployment_id: The ID of the deployment
-
-        Returns:
-            Dictionary containing deployment details
-        """
-        pass
-
-    @abstractmethod
-    def list_deployments(self) -> t.List[DeploymentResponse]:
-        """List all deployments.
-
-        Returns:
-            List of dictionaries containing deployment id and details
-        """
-        pass
-
-    @abstractmethod
-    def delete_deployment(self, deployment_id: str) -> None:
-        """Delete a deployment.
-
-        Args:
-            deployment_id: The ID of the deployment to delete
-        """
-        pass
-
-    @abstractmethod
-    def get_status(
-        self,
-        deployment_id: str,
-    ) -> DeploymentStatus:
-        """Get the current status of a deployment.
-
-        Args (one of):
-            deployment_id: The ID of the deployment
-
-        Returns:
-            The current status of the deployment
-        """
-        pass
-
-    @abstractmethod
-    def wait_until_ready(
-        self, deployment_id: str, timeout: int = 3600
-    ) -> t.Tuple[DeploymentResponse, bool]:
-        """Wait until a deployment is ready.
-
-        Args:
-            deployment_id: The ID of the deployment
-            timeout: Maximum time to wait in seconds
-
-        Returns:
-            Tuple of deployment response and a boolean indicating if the deployment became ready
-        """
-        pass
-
-    @abstractmethod
-    def get_endpoint_urls(
-        self,
-        deployment_id: str,
-    ) -> t.List[str]:
-        """Get the list of endpoint urls attached to a deployment.
-
-        Args (one of):
-            deployment_id: The ID of the deployment
-
-        Returns:
-            List of deployment's endpoint urls
-        """
-        pass
--- a/deploy/sdk/tests/test_deployment.sh
+++ b/deploy/sdk/tests/test_deployment.sh
@@ -32,4 +32,4 @@ DYNAMO_TAG=$(dynamo build hello_world:Frontend | grep "Successfully built" | awk

 # Step.3: Deploy!
 echo $DYNAMO_TAG
-dynamo deployment create $DYNAMO_TAG --no-wait -n $DEPLOYMENT_NAME
+# TODO: Deploy your service using a DynamoGraphDeployment CR.
--- a/docs/examples/hello_world.md
+++ b/docs/examples/hello_world.md
@@ -124,8 +124,7 @@ cd $PROJECT_ROOT/examples/hello_world
 DYNAMO_TAG=$(dynamo build hello_world:Frontend | grep "Successfully built" | awk '{ print $3 }' | sed 's/\.$//')

 # Deploy to Kubernetes
-export DEPLOYMENT_NAME=ci-hw
-dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME
+# TODO: Deploy your service using a DynamoGraphDeployment CR.
 ```

 ### Testing the Deployment

--- a/docs/examples/llm_deployment.md
+++ b/docs/examples/llm_deployment.md
@@ -243,7 +243,7 @@ DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" |  awk

 # Deploy to Kubernetes
 export DEPLOYMENT_NAME=llm-agg
-dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml
+# TODO: Deploy your service using a DynamoGraphDeployment CR.
 ```

 **Note**: Optionally add `--Planner.no-operation=false` at the end of the deployment command to enable the planner component to take scaling actions on your deployment.

--- a/docs/guides/cli_overview.md
+++ b/docs/guides/cli_overview.md
@@ -91,24 +91,4 @@ cd examples/hello_world
 dynamo build hello_world:Frontend
 ```

-### `deploy`
-
-Use `deploy` to create a pipeline on Dynamo Cloud using either interactive prompts or a YAML configuration file. For more details, see [Deploying Inference Graphs to Kubernetes](dynamo_deploy/README.md).
-
-#### Usage
-```bash
-dynamo deploy [PIPELINE]
-```
-
-#### Arguments
-* `PIPELINE`: The pipeline to deploy; defaults to *None*; required
-
-#### Flags
-* `--name`/`-n`: Set the deployment name. Defaults to *None*; required
-* `--config-file`/`-f`: Specify the configuration file path. Defaults to *None*; required
-* `--wait`/`--no-wait`: Choose whether to wait for deployment readiness. Defaults to wait
-* `--timeout`: Set maximum deployment time in seconds. Defaults to 3600
-* `--endpoint`/`-e`: Specify the Dynamo Cloud deployment endpoint. Defaults to *None*; required
-* `--help`/`-h`: Display command help
-
 For a detailed deployment example, see [Operator Deployment](dynamo_deploy/operator_deployment.md).
--- a/docs/guides/dynamo_deploy/operator_deployment.md
+++ b/docs/guides/dynamo_deploy/operator_deployment.md
@@ -114,15 +114,7 @@ DYNAMO_TAG=$(dynamo build hello_world:Frontend | grep "Successfully built" | awk

 ### 3. Deploy to Kubernetes

-Deploy your service using the Dynamo deployment command:
-
-```bash
-# Set your Helm release name
-export DEPLOYMENT_NAME=hello-world
-
-# Create the deployment
-dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME
-```
+TODO: Deploy your service using a DynamoGraphDeployment CR.

 #### Managing Deployments


--- a/examples/llm/README.md
+++ b/examples/llm/README.md
@@ -228,7 +228,7 @@ DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" |  awk

 # Deploy to Kubernetes
 export DEPLOYMENT_NAME=llm-agg
-dynamo deployment create $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg.yaml
+# TODO: Deploy your service using a DynamoGraphDeployment CR.
 ```

 **Note**: To avoid rate limiting from unauthenticated requests to HuggingFace (HF), you can provide your `HF_TOKEN` as a secret in your deployment. See the [operator deployment guide](../../docs/guides/dynamo_deploy/operator_deployment.md#referencing-secrets-in-your-deployment) for instructions on referencing secrets like `HF_TOKEN` in your deployment configuration.

--- a/examples/multimodal/README.md
+++ b/examples/multimodal/README.md
@@ -207,23 +207,7 @@ export DYNAMO_CLOUD=http://localhost:8080  # If using port-forward
 # Build the Dynamo base image (see operator_deployment.md for details)
 export DYNAMO_IMAGE=<your-registry>/<your-image-name>:<your-tag>

-# Build the service
-cd $PROJECT_ROOT/examples/multimodal
-DYNAMO_TAG=$(dynamo build graphs.agg:Frontend | grep "Successfully built" |  awk '{ print $NF }' | sed 's/\.$//')
-# For disaggregated serving:
-# DYNAMO_TAG=$(dynamo build graphs.disagg:Frontend | grep "Successfully built" |  awk '{ print $NF }' | sed 's/\.$//')
-
-# Deploy to Kubernetes
-export DEPLOYMENT_NAME=multimodal-agg
-# For aggregated serving with LLaVA:
-dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg-llava.yaml
-# For aggregated serving with Qwen2.5-VL:
-# dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg-qwen.yaml
-# For aggregated serving with Phi3V:
-# dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/agg-phi3v.yaml
-# For disaggregated serving:
-# export DEPLOYMENT_NAME=multimodal-disagg
-# dynamo deploy $DYNAMO_TAG -n $DEPLOYMENT_NAME -f ./configs/disagg.yaml
+# TODO: Apply Dynamo graph deployment for the example
 ```

 **Note**: To avoid rate limiting from unauthenticated requests to HuggingFace (HF), you can provide your `HF_TOKEN` as a secret in your deployment. See the [operator deployment guide](../../docs/guides/dynamo_deploy/operator_deployment.md#referencing-secrets-in-your-deployment) for instructions on referencing secrets like `HF_TOKEN` in your deployment configuration.