chore: refactor examples and clean CLI (#195)

df51a622 · ishandhanani · GitHub · 0517f757 · df51a622 · 0517f757
Commit df51a622 authored Mar 16, 2025 by ishandhanani Committed by GitHub Mar 17, 2025
20 changed files
--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/cli.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/cli.py
@@ -23,45 +23,40 @@ def create_bentoml_cli() -> click.Command:
    from bentoml._internal.configuration import BENTOML_VERSION
    from bentoml._internal.context import server_context
    from bentoml_cli.bentos import bento_command
-    from bentoml_cli.cloud import cloud_command
    from bentoml_cli.containerize import containerize_command
-    from bentoml_cli.deployment import deployment_command, develop_command
-    from bentoml_cli.env import env_command
-    from bentoml_cli.models import model_command
-    from bentoml_cli.secret import secret_command
-    from bentoml_cli.utils import BentoMLCommandGroup, get_entry_points
+    from bentoml_cli.utils import get_entry_points

-    from dynamo.sdk.cli.delete import delete_command
    from dynamo.sdk.cli.deploy import deploy_command
-    from dynamo.sdk.cli.list import list_command
    from dynamo.sdk.cli.run import run_command
    from dynamo.sdk.cli.serve import serve_command
+    from dynamo.sdk.cli.server import cloud_command
    from dynamo.sdk.cli.start import start_command
+    from dynamo.sdk.cli.utils import DynamoCommandGroup

    server_context.service_type = "cli"

    CONTEXT_SETTINGS = {"help_option_names": ("-h", "--help")}

-    @click.group(cls=BentoMLCommandGroup, context_settings=CONTEXT_SETTINGS)
+    @click.group(cls=DynamoCommandGroup, context_settings=CONTEXT_SETTINGS)
    @click.version_option(BENTOML_VERSION, "-v", "--version")
    def bentoml_cli():  # TODO: to be renamed to something....
-        """ """
+        """
+        The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications.
+        It takes inspiration from and leverages core pieces of the BentoML deployment stack.
+
+        At a high level, you use `serve` to run a set of dynamo services locally,
+        `build` and `containerize` to package them  up for deployment, and then `server`
+        and `deploy` to deploy them to a K8s cluster running the Dynamo Server
+        """

    # Add top-level CLI commands
-    bentoml_cli.add_command(env_command)
    bentoml_cli.add_command(cloud_command)
-    bentoml_cli.add_command(model_command)
-    bentoml_cli.add_subcommands(bento_command)
+    bentoml_cli.add_single_command(bento_command, "build")
    bentoml_cli.add_subcommands(start_command)
    bentoml_cli.add_subcommands(serve_command)
    bentoml_cli.add_subcommands(run_command)
    bentoml_cli.add_command(containerize_command)
    bentoml_cli.add_command(deploy_command)
-    bentoml_cli.add_command(develop_command)
-    bentoml_cli.add_command(deployment_command)
-    bentoml_cli.add_command(secret_command)
-    bentoml_cli.add_command(list_command)
-    bentoml_cli.add_command(delete_command)
    # Load commands from extensions
    for ep in get_entry_points("bentoml.commands"):
        bentoml_cli.add_command(ep.load())

--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/delete.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/delete.py
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-from __future__ import annotations
-
-import logging
-
-import bentoml
-import click
-from bentoml._internal.cloud.base import Spinner
-from bentoml_cli.utils import BentoMLCommandGroup
-from kubernetes import client, config
-from rich.console import Console
-
-logger = logging.getLogger(__name__)
-
-
-def build_delete_command() -> click.Group:
-    @click.group(name="delete", cls=BentoMLCommandGroup)
-    def cli():
-        """Delete resources"""
-        pass
-
-    @cli.command(name="bentos")
-    @click.argument("bento_tag", type=click.STRING, required=False)
-    @click.option(
-        "--all",
-        is_flag=True,
-        default=False,
-        help="Delete all bentos in local store",
-    )
-    @click.option(
-        "--force",
-        is_flag=True,
-        default=False,
-        help="Skip confirmation prompt",
-    )
-    def delete_bentos(
-        bento_tag: str | None = None,
-        all: bool = False,
-        force: bool = False,
-    ):
-        """
-        Delete bentos from local store
-
-        Args:
-            bento_tag: Tag of the bento to delete
-            all: Delete all bentos
-            force: Skip confirmation prompt
-        """
-        console = Console(highlight=False)
-
-        # Validate arguments
-        if not bento_tag and not all:
-            raise click.ClickException(
-                "Either specify a bento tag or use --all to delete all bentos"
-            )
-
-        if bento_tag and all:
-            raise click.ClickException("Cannot specify both a bento tag and --all flag")
-
-        with Spinner(console=console) as spinner:
-            try:
-                # Get bentos to delete
-                bentos_to_delete = []
-
-                if all:
-                    # Get all bentos
-                    spinner.update("Fetching all bentos")
-                    bentos = bentoml.list()
-                    bentos_to_delete = [str(bento.tag) for bento in bentos]
-
-                    if not bentos_to_delete:
-                        spinner.log("No bentos found in local store")
-                        return
-                else:
-                    # Check if the specified bento exists
-                    if bento_tag is not None:
-                        bentos_to_delete = [bento_tag]
-                    else:
-                        # This should never happen due to earlier validation, but handle it anyway
-                        spinner.log("[bold red]No bento tag specified[/]")
-                        return
-
-                # Confirm deletion if not forced
-                if not force:
-                    spinner.stop()
-                    if all:
-                        message = f"Are you sure you want to delete all {len(bentos_to_delete)} bentos from local store?"
-                    else:
-                        message = f"Are you sure you want to delete bento '{bento_tag}' from local store?"
-
-                    if not click.confirm(message):
-                        console.print("[yellow]Deletion cancelled[/]")
-                        return
-                    spinner.start()
-
-                # Delete bentos
-                for tag in bentos_to_delete:
-                    spinner.update(f"Deleting bento '{tag}'")
-                    try:
-                        bentoml.delete(tag)
-                        spinner.log(f"[green]Successfully deleted bento '{tag}'[/]")
-                    except Exception as e:
-                        spinner.log(
-                            f"[bold red]Failed to delete bento '{tag}': {str(e)}[/]"
-                        )
-                        logger.error(f"Failed to delete bento '{tag}'", exc_info=True)
-
-                # Final summary
-                if all:
-                    spinner.log(
-                        f"[bold green]Deleted {len(bentos_to_delete)} bentos from local store[/]"
-                    )
-
-            except Exception as e:
-                logger.error("Deletion operation failed", exc_info=True)
-                spinner.log(f"[bold red]Operation failed: {str(e)}[/]")
-                raise SystemExit(1)
-
-    @cli.command(name="deployments")
-    @click.argument("deployment_name", type=click.STRING, required=False)
-    @click.option(
-        "--namespace",
-        type=click.STRING,
-        default="default",
-        help="Kubernetes namespace containing the deployments",
-    )
-    @click.option(
-        "--all",
-        is_flag=True,
-        default=False,
-        help="Delete all deployments in the namespace",
-    )
-    @click.option(
-        "--force",
-        is_flag=True,
-        default=False,
-        help="Skip confirmation prompt",
-    )
-    def delete_deployments(
-        deployment_name: str | None = None,
-        namespace: str = "default",
-        all: bool = False,
-        force: bool = False,
-    ):
-        """
-        Delete deployments from a Kubernetes namespace
-
-        Args:
-            deployment_name: Name of the deployment to delete
-            namespace: Kubernetes namespace containing the deployments
-            all: Delete all deployments in the namespace
-            force: Skip confirmation prompt
-        """
-        console = Console(highlight=False)
-
-        # Validate arguments
-        if not deployment_name and not all:
-            raise click.ClickException(
-                "Either specify a deployment name or use --all to delete all deployments"
-            )
-
-        if deployment_name and all:
-            raise click.ClickException(
-                "Cannot specify both a deployment name and --all flag"
-            )
-
-        # Load Kubernetes configuration
-        try:
-            config.load_kube_config()
-            api = client.CustomObjectsApi()
-        except Exception as e:
-            logger.error("Failed to load Kubernetes configuration", exc_info=True)
-            raise click.ClickException(
-                f"Failed to load Kubernetes configuration: {str(e)}"
-            )
-
-        # Define the group, version, and plural for the CRD
-        group = "nvidia.com"
-        version = "v1alpha1"
-        plural = "dynamodeployments"
-
-        with Spinner(console=console) as spinner:
-            try:
-                # Get deployments to delete
-                deployments_to_delete = []
-
-                if all:
-                    # Get all deployments in the namespace
-                    spinner.update(
-                        f"Fetching all deployments in namespace '{namespace}'"
-                    )
-                    deployments = api.list_namespaced_custom_object(
-                        group=group,
-                        version=version,
-                        namespace=namespace,
-                        plural=plural,
-                    )
-                    deployments_to_delete = [
-                        item["metadata"]["name"]
-                        for item in deployments.get("items", [])
-                    ]
-
-                    if not deployments_to_delete:
-                        spinner.log(f"No deployments found in namespace '{namespace}'")
-                        return
-                else:
-                    # Check if the specified deployment exists
-                    try:
-                        api.get_namespaced_custom_object(
-                            group=group,
-                            version=version,
-                            namespace=namespace,
-                            plural=plural,
-                            name=deployment_name,
-                        )
-                        deployments_to_delete = [deployment_name]
-                    except client.rest.ApiException as e:
-                        if e.status == 404:
-                            spinner.log(
-                                f"[bold red]Deployment '{deployment_name}' not found in namespace '{namespace}'[/]"
-                            )
-                            return
-                        raise
-
-                # Confirm deletion if not forced
-                if not force:
-                    spinner.stop()
-                    if all:
-                        message = f"Are you sure you want to delete all {len(deployments_to_delete)} deployments in namespace '{namespace}'?"
-                    else:
-                        message = f"Are you sure you want to delete deployment '{deployment_name}' in namespace '{namespace}'?"
-
-                    if not click.confirm(message):
-                        console.print("[yellow]Deletion cancelled[/]")
-                        return
-                    spinner.start()
-
-                # Delete deployments
-                for name in deployments_to_delete:
-                    spinner.update(
-                        f"Deleting deployment '{name}' in namespace '{namespace}'"
-                    )
-                    try:
-                        api.delete_namespaced_custom_object(
-                            group=group,
-                            version=version,
-                            namespace=namespace,
-                            plural=plural,
-                            name=name,
-                        )
-                        spinner.log(
-                            f"[green]Successfully deleted deployment '{name}'[/]"
-                        )
-                    except client.rest.ApiException as e:
-                        if e.status == 404:
-                            spinner.log(
-                                f"[yellow]Deployment '{name}' not found or already deleted[/]"
-                            )
-                        else:
-                            spinner.log(
-                                f"[bold red]Failed to delete deployment '{name}': {str(e)}[/]"
-                            )
-                            logger.error(
-                                f"Failed to delete deployment '{name}'", exc_info=True
-                            )
-
-                # Final summary
-                if all:
-                    spinner.log(
-                        f"[bold green]Deleted {len(deployments_to_delete)} deployments from namespace '{namespace}'[/]"
-                    )
-
-            except Exception as e:
-                logger.error("Deletion operation failed", exc_info=True)
-                spinner.log(f"[bold red]Operation failed: {str(e)}[/]")
-                raise SystemExit(1)
-
-    return cli
-
-
-delete_command = build_delete_command()
--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/deploy.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/deploy.py
@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)

 @click.group(name="deploy")
 def deploy_command_group():
-    """Deploy 🍱 to a cluster"""
+    """Deploy to a cluster"""
    pass


@@ -58,6 +58,8 @@ def convert_env_to_dict(env: tuple[str, ...] | None) -> list[dict[str, str]] | N


 def build_deploy_command() -> click.Command:
+    from bentoml._internal.utils import add_experimental_docstring
+
    @click.command(name="deploy")
    @click.argument("bento", type=click.STRING, default=".")
    @click.option("-n", "--name", type=click.STRING, help="Deployment name")
@@ -117,6 +119,7 @@ def build_deploy_command() -> click.Command:
    )
    @click.option("--strategy", type=click.STRING, default="rolling-update")
    @click.option("--version", type=click.STRING, help="Version tag for the Bento")
+    @add_experimental_docstring
    def deploy_command(
        bento: str | None,
        name: str | None,
@@ -136,12 +139,11 @@ def build_deploy_command() -> click.Command:
        version: str | None = None,
    ):
        """
-        Deploy 🍱 to a cluster
+        Deploy a set of Dynamo services in a Bento to a K8s cluster

        \b
        BENTO is the serving target, it can be:
        - a tag to a Bento in local Bento store
-        - a folder containing a valid 'bentofile.yaml'
        - a path to a built Bento
        """
        from bentoml._internal.log import configure_server_logging

--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/list.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/list.py
-#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#  SPDX-License-Identifier: Apache-2.0
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-from __future__ import annotations
-
-import json
-import os
-import typing as t
-
-import bentoml
-import click
-from bentoml_cli.utils import BentoMLCommandGroup
-from kubernetes import client, config
-from rich import print as rich_print
-from rich.table import Table
-
-
-def build_list_command() -> click.Group:
-    @click.group(name="list", cls=BentoMLCommandGroup)
-    def cli():
-        """List resources"""
-        pass
-
-    @cli.command(name="bentos")
-    def list_bentos():
-        """List all bentos in local store"""
-        bentos = bentoml.list()
-        table = Table(box=None, expand=True)
-        table.add_column("Tag", overflow="fold")
-        table.add_column("Service", overflow="fold")
-        table.add_column("Created At", overflow="fold")
-
-        for bento in bentos:
-            table.add_row(
-                str(bento.tag),
-                bento.info.service,
-                bento.info.creation_time.strftime("%Y-%m-%d %H:%M:%S"),
-            )
-        rich_print(table)
-
-    @cli.command(name="deployments")
-    @click.option("--namespace", type=click.STRING, help="Kubernetes namespace")
-    @click.option("--cluster", type=click.STRING, help="Cluster name")
-    @click.option(
-        "-o",
-        "--output",
-        help="Display the output of this command.",
-        type=click.Choice(["json", "table"]),
-        default="table",
-    )
-    def list_deployments(
-        namespace: str | None = None,
-        cluster: str | None = None,
-        output: t.Literal["json", "table"] = "table",
-    ):
-        """List deployments"""
-        config.load_kube_config()
-        api = client.CustomObjectsApi()
-
-        # Define the group, version, and plural for the CRD
-        group = "nvidia.com"
-        version = "v1alpha1"
-        plural = "dynamodeployments"
-
-        # Get the deployments from the Kubernetes API
-        deployments = api.list_namespaced_custom_object(
-            group=group,
-            version=version,
-            namespace=namespace,
-            plural=plural,
-        )
-
-        if output == "json":
-            rich_print(json.dumps(deployments, indent=2))
-            return
-
-        # Create table for output
-        table = Table(box=None, expand=True)
-        table.add_column("Name", overflow="fold")
-        table.add_column("Namespace", overflow="fold")
-        table.add_column("Status", overflow="fold")
-        table.add_column("Created At", overflow="fold")
-        table.add_column("Replicas", overflow="fold")
-        table.add_column("Resources", overflow="fold")
-        table.add_column("URL", overflow="fold")
-
-        ingress_suffix = os.getenv("DYNAMO_INGRESS_SUFFIX", "local")
-
-        for item in deployments.get("items", []):
-            metadata = item.get("metadata", {})
-            spec = item.get("spec", {})
-            services = spec.get("services", {}).get("main", {}).get("spec", {})
-            resources = services.get("resources", {})
-            ingress = services.get("ingress", {})
-
-            # Format resources
-            resources_str = (
-                f"CPU: {resources.get('requests', {}).get('cpu', 'N/A')} / {resources.get('limits', {}).get('cpu', 'N/A')}\n"
-                f"Memory: {resources.get('requests', {}).get('memory', 'N/A')} / {resources.get('limits', {}).get('memory', 'N/A')}\n"
-                f"GPU: {resources.get('requests', {}).get('gpu', 'N/A')} / {resources.get('limits', {}).get('gpu', 'N/A')}"
-            )
-
-            # Format URL
-            url = (
-                f"https://{ingress.get('hostPrefix', 'N/A')}.{ingress_suffix}"
-                if ingress.get("enabled", False)
-                else "N/A"
-            )
-
-            table.add_row(
-                metadata.get("name", "N/A"),
-                metadata.get("namespace", "N/A"),
-                item.get("status", {}).get("state", "Unknown"),
-                metadata.get("creationTimestamp", "N/A"),
-                f"{services.get('autoscaling', {}).get('minReplicas', 'N/A')} - {services.get('autoscaling', {}).get('maxReplicas', 'N/A')}",
-                resources_str,
-                url,
-            )
-
-        rich_print(table)
-
-    return cli
-
-
-list_command = build_list_command()
--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/run.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/run.py
@@ -23,9 +23,7 @@ import click


 def build_run_command() -> click.Group:
-    from bentoml_cli.utils import BentoMLCommandGroup
-
-    @click.group(name="run", cls=BentoMLCommandGroup)
+    @click.group(name="run")
    def cli():
        pass


--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
@@ -150,9 +150,9 @@ def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
 def build_serve_command() -> click.Group:
    from bentoml._internal.log import configure_server_logging
    from bentoml_cli.env_manager import env_manager
-    from bentoml_cli.utils import AliasCommand, BentoMLCommandGroup
+    from bentoml_cli.utils import AliasCommand

-    @click.group(name="serve", cls=BentoMLCommandGroup)
+    @click.group(name="serve")
    def cli():
        pass

@@ -335,7 +335,7 @@ def build_serve_command() -> click.Group:
        timeout_graceful_shutdown: int | None,
        **attrs: t.Any,
    ) -> None:
-        """Start a HTTP BentoServer from a given 🍱
+        """Locally run connected Dynamo services

        \b
        You can also pass service-specific configuration options using --ServiceName.param=value format.

--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/server.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/server.py
+#  SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+#  #
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#  #
+#  http://www.apache.org/licenses/LICENSE-2.0
+#  #
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+import sys
+
+import click
+import rich
+from bentoml._internal.cloud.client import RestApiClient
+from bentoml._internal.cloud.config import (
+    DEFAULT_ENDPOINT,
+    CloudClientConfig,
+    CloudClientContext,
+)
+from bentoml._internal.configuration.containers import BentoMLContainer
+from bentoml._internal.utils.cattr import bentoml_cattr
+from bentoml.exceptions import CLIException, CloudRESTApiClientError
+
+
+@click.group(name="server")
+def cloud_command():
+    """Interact with your Dynamo Server"""
+
+
+@cloud_command.command()
+@click.option(
+    "--endpoint",
+    type=click.STRING,
+    help="Dynamo Server endpoint",
+    default=DEFAULT_ENDPOINT,
+    envvar="DYNAMO_SERVER_API_ENDPOINT",
+    show_default=True,
+    show_envvar=True,
+    required=True,
+)
+@click.option(
+    "--api-token",
+    type=click.STRING,
+    help="Dynamo Server user API token",
+    envvar="DYNAMO_SERVER_API_KEY",
+    show_envvar=True,
+    required=True,
+)
+def login(endpoint: str, api_token: str) -> None:  # type: ignore
+    """Connect to your Dynamo Server. You can find deployment instructions for this in our docs"""
+    try:
+        cloud_rest_client = RestApiClient(endpoint, api_token)
+        user = cloud_rest_client.v1.get_current_user()
+
+        if user is None:
+            raise CLIException("current user is not found")
+
+        org = cloud_rest_client.v1.get_current_organization()
+
+        if org is None:
+            raise CLIException("current organization is not found")
+
+        current_context_name = CloudClientConfig.get_config().current_context_name
+        cloud_context = BentoMLContainer.cloud_context.get()
+
+        ctx = CloudClientContext(
+            name=cloud_context if cloud_context is not None else current_context_name,
+            endpoint=endpoint,
+            api_token=api_token,
+            email=user.email,
+        )
+
+        ctx.save()
+        rich.print(
+            f":white_check_mark: Configured BentoCloud credentials (current-context: {ctx.name})"
+        )
+        rich.print(
+            f":white_check_mark: Logged in as [blue]{user.email}[/] at [blue]{org.name}[/] organization"
+        )
+    except CloudRESTApiClientError as e:
+        if e.error_code == 401:
+            rich.print(
+                f":police_car_light: Error validating token: HTTP 401: Bad credentials ({endpoint}/api-token)",
+                file=sys.stderr,
+            )
+        else:
+            rich.print(
+                f":police_car_light: Error validating token: HTTP {e.error_code}",
+                file=sys.stderr,
+            )
+
+
+@cloud_command.command()
+def current_context() -> None:  # type: ignore
+    """Get current cloud context."""
+    rich.print_json(
+        data=bentoml_cattr.unstructure(CloudClientConfig.get_config().get_context())
+    )
+
+
+@cloud_command.command()
+def list_context() -> None:  # type: ignore
+    """List all available context."""
+    config = CloudClientConfig.get_config()
+    rich.print_json(data=bentoml_cattr.unstructure([i.name for i in config.contexts]))
+
+
+@cloud_command.command()
+@click.argument("context_name", type=click.STRING)
+def update_current_context(context_name: str) -> None:  # type: ignore
+    """Update current context"""
+    ctx = CloudClientConfig.get_config().set_current_context(context_name)
+    rich.print(f"Successfully switched to context: {ctx.name}")
--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
@@ -484,7 +484,7 @@ def serve_http(
                        hasattr(svc, "is_dynamo_component")
                        and svc.is_dynamo_component()
                    )
-                    else 'Starting production %s BentoServer from "%s" (Press CTRL+C to quit)'
+                    else "Starting %s (Press CTRL+C to quit)"
                ),
                *(
                    (svc.name, *svc.dynamo_address(), scheme, log_host, port)
@@ -492,7 +492,7 @@ def serve_http(
                        hasattr(svc, "is_dynamo_component")
                        and svc.is_dynamo_component()
                    )
-                    else (scheme.upper(), bento_identifier)
+                    else (bento_identifier,)
                ),
            ),
        )

--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/start.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/start.py
@@ -19,24 +19,32 @@ import json
 import logging
 import os
 import sys
+import typing as t
 from typing import Optional
 from urllib.parse import urlparse

 import click
 import rich
+import yaml
+
+from dynamo.sdk.cli.serve import _parse_service_args

 logger = logging.getLogger(__name__)


 def build_start_command() -> click.Group:
    from bentoml._internal.utils import add_experimental_docstring
-    from bentoml_cli.utils import BentoMLCommandGroup

-    @click.group(name="start", cls=BentoMLCommandGroup)
+    @click.group(name="start")
    def cli():
        pass

-    @cli.command()
+    @cli.command(
+        context_settings=dict(
+            ignore_unknown_options=True,
+            allow_extra_args=True,
+        ),
+    )
    @click.argument("bento", type=click.STRING, default=".")
    @click.option(
        "--service-name",
@@ -46,6 +54,12 @@ def build_start_command() -> click.Group:
        envvar="BENTOML_SERVE_SERVICE_NAME",
        help="specify the runner name to serve",
    )
+    @click.option(
+        "-f",
+        "--file",
+        type=click.Path(exists=True),
+        help="Path to YAML config file for service configuration",
+    )
    @click.option(
        "--depends",
        type=click.STRING,
@@ -137,15 +151,24 @@ def build_start_command() -> click.Group:
        help="Reload Service when code changes detected",
        default=False,
    )
+    @click.option(
+        "--dry-run",
+        is_flag=True,
+        help="Print the final service configuration and exit without starting the server",
+        default=False,
+    )
    @add_experimental_docstring
    def start(
+        ctx: click.Context,
        bento: str,
        service_name: str,
+        dry_run: bool,
        depends: Optional[list[str]],
        runner_map: Optional[str],
        bind: Optional[str],
        port: Optional[int],
        host: Optional[str],
+        file: str | None,
        backlog: Optional[int],
        working_dir: Optional[str],
        api_workers: Optional[int],
@@ -162,11 +185,44 @@ def build_start_command() -> click.Group:
        reload: bool = False,
    ) -> None:
        """
-        Start a HTTP API server standalone. This will be used inside Yatai.
+        Start a single Dynamo service. This will be used inside Yatai.
        """
        from bentoml import Service
        from bentoml._internal.service.loader import load

+        service_configs: dict[str, dict[str, t.Any]] = {}
+
+        # Load file if provided
+        if file:
+            with open(file) as f:
+                yaml_configs = yaml.safe_load(f)
+                # Initialize service_configs as empty dict if it's None
+                # Convert nested YAML structure to flat dict with dot notation
+                for service, configs in yaml_configs.items():
+                    for key, value in configs.items():
+                        if service not in service_configs:
+                            service_configs[service] = {}
+                        service_configs[service][key] = value
+
+        # Process service-specific options
+        cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
+        for service, configs in cmdline_overrides.items():
+            for key, value in configs.items():
+                if service not in service_configs:
+                    service_configs[service] = {}
+                service_configs[service][key] = value
+
+        if dry_run:
+            rich.print("[bold]Service Configuration:[/bold]")
+            rich.print(json.dumps(service_configs, indent=2))
+            rich.print("\n[bold]Environment Variable that would be set:[/bold]")
+            rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
+            sys.exit(0)
+
+        # Set environment variable with service configuration
+        if service_configs:
+            os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)
+
        if working_dir is None:
            if os.path.isdir(os.path.expanduser(bento)):
                working_dir = os.path.expanduser(bento)

--- a/deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing as t
+
+import click
+from click import Command, Context
+
+
+class DynamoCommandGroup(click.Group):
+    """Simplified version of BentoMLCommandGroup for Dynamo CLI"""
+
+    def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+        self.aliases = kwargs.pop("aliases", [])
+        super().__init__(*args, **kwargs)
+        self._commands: dict[str, list[str]] = {}
+        self._aliases: dict[str, str] = {}
+
+    def add_command(self, cmd: Command, name: str | None = None) -> None:
+        assert cmd.callback is not None
+        callback = cmd.callback
+        cmd.callback = callback
+        cmd.context_settings["max_content_width"] = 120
+        aliases = getattr(cmd, "aliases", None)
+        if aliases:
+            assert cmd.name
+            self._commands[cmd.name] = aliases
+            self._aliases.update({alias: cmd.name for alias in aliases})
+        return super().add_command(cmd, name)
+
+    def add_subcommands(self, group: click.Group) -> None:
+        if not isinstance(group, click.MultiCommand):
+            raise TypeError(
+                "DynamoCommandGroup.add_subcommands only accepts click.MultiCommand"
+            )
+        if isinstance(group, DynamoCommandGroup):
+            # Common wrappers are already applied, call the super() method
+            for name, cmd in group.commands.items():
+                super().add_command(cmd, name)
+            self._commands.update(group._commands)
+            self._aliases.update(group._aliases)
+        else:
+            for name, cmd in group.commands.items():
+                self.add_command(cmd, name)
+
+    def resolve_alias(self, cmd_name: str):
+        return self._aliases[cmd_name] if cmd_name in self._aliases else cmd_name
+
+    def get_command(self, ctx: Context, cmd_name: str) -> Command | None:
+        cmd_name = self.resolve_alias(cmd_name)
+        return super().get_command(ctx, cmd_name)
+
+    def add_single_command(self, group: click.Group, command_name: str) -> None:
+        """Add a single command from a group by name."""
+        if not isinstance(group, click.MultiCommand):
+            raise TypeError("Only accepts click.MultiCommand")
+
+        ctx = click.Context(group)
+        cmd = group.get_command(ctx, command_name)
+        if cmd is None:
+            raise ValueError(f"Command '{command_name}' not found in group")
+
+        self.add_command(cmd, command_name)
--- a/deploy/dynamo/sdk/src/dynamo/sdk/lib/dependency.py
+++ b/deploy/dynamo/sdk/src/dynamo/sdk/lib/dependency.py
@@ -139,7 +139,7 @@ class DynamoDependency(Dependency[T]):
        ...
        await dep.get_endpoint("generate") # equivalent to the following
        router_client = (
-            await runtime.namespace("dynamo-init")
+            await runtime.namespace("dynamo")
            .component("router")
            .endpoint("generate")
            .client()

--- a/deploy/examples/llm/README.md
+++ b/deploy/examples/llm/README.md
@@ -27,7 +27,7 @@ This directory contains examples and reference implementations for deploying Lar

 ## Deployment Architectures

-### Monolith
+### Aggregated
 Single-instance deployment where both prefill and decode are done by the same worker.

 ### Disaggregated
@@ -83,34 +83,28 @@ This figure shows an overview of the major components to deploy:

 ### Example architectures

-#### Router based worker
+#### Aggregated serving
 ```bash
 cd /workspace/deploy/examples/llm
-dynamo serve monolith.router_based_deployment:Frontend -f ./configs/monolith/router_based_deployment.yaml
+dynamo serve graphs.agg:Frontend -f ./configs/agg.yaml
 ```

-#### Routerless monolith
+#### Aggregated serving with KV Routing
 ```bash
 cd /workspace/deploy/examples/llm
-dynamo serve monolith.routerless_deployment:Frontend -f ./configs/monolith/routerless_deployment.yaml
+dynamo serve graphs.agg_router:Frontend -f ./configs/agg_router.yaml
 ```

-#### Routerless processor based monolith
-```bash
-dynamo serve monolith.routerless_processor_deployment:Frontend -f ./configs/monolith/routerless_processor_deployment.yaml
-```
-
-
-#### Router based disaggregated serving
+#### Disaggregated serving
 ```bash
 cd /workspace/deploy/examples/llm
-dynamo serve disaggregated.router_based_deployment:Frontend -f ./configs/disaggregated/router_based_deployment.yaml
+dynamo serve graphs.disagg:Frontend -f ./configs/disagg.yaml
 ```

-#### Routerless disaggregated serving
+#### Disaggregated serving with KV Routing
 ```bash
 cd /workspace/deploy/examples/llm
-dynamo serve disaggregated.routerless_deployment:Frontend -f ./configs/disaggregated/routerless_deployment.yaml
+dynamo serve graphs.disagg_router:Frontend -f ./configs/disagg_router.yaml
 ```

 ### Client

--- a/deploy/examples/llm/components/kv_router.py
+++ b/deploy/examples/llm/components/kv_router.py
@@ -67,7 +67,7 @@ def parse_args(service_name, prefix) -> Namespace:
 @service(
    dynamo={
        "enabled": True,
-        "namespace": "dynamo-init",
+        "namespace": "dynamo",
    },
    resources={"cpu": "10", "memory": "20Gi"},
    workers=1,
@@ -87,7 +87,7 @@ class Router:
    async def async_init(self):
        self.runtime = dynamo_context["runtime"]
        self.workers_client = (
-            await self.runtime.namespace("dynamo-init")
+            await self.runtime.namespace("dynamo")
            .component("VllmWorker")
            .endpoint("generate")
            .client()
@@ -101,7 +101,7 @@ class Router:
            )
            await asyncio.sleep(2)

-        kv_listener = self.runtime.namespace("dynamo-init").component("VllmWorker")
+        kv_listener = self.runtime.namespace("dynamo").component("VllmWorker")
        await kv_listener.create_service()
        self.indexer = KvIndexer(kv_listener, self.args.block_size)
        self.metrics_aggregator = KvMetricsAggregator(kv_listener)

--- a/deploy/examples/llm/components/prefill_worker.py
+++ b/deploy/examples/llm/components/prefill_worker.py
@@ -44,7 +44,7 @@ class RequestType(BaseModel):
 @service(
    dynamo={
        "enabled": True,
-        "namespace": "dynamo-init",
+        "namespace": "dynamo",
    },
    resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
    workers=1,
@@ -89,7 +89,7 @@ class PrefillWorker:
            raise RuntimeError("Failed to initialize engine client")
        runtime = dynamo_context["runtime"]
        metadata = self.engine_client.nixl_metadata
-        self._metadata_store = NixlMetadataStore("dynamo-init", runtime)
+        self._metadata_store = NixlMetadataStore("dynamo", runtime)
        await self._metadata_store.put(metadata.engine_id, metadata)
        task = asyncio.create_task(self.prefill_queue_handler())
        task.add_done_callback(lambda _: print("prefill queue handler created"))

--- a/deploy/examples/llm/components/processor.py
+++ b/deploy/examples/llm/components/processor.py
@@ -41,7 +41,7 @@ class RequestType(Enum):
 @service(
    dynamo={
        "enabled": True,
-        "namespace": "dynamo-init",
+        "namespace": "dynamo",
    },
    resources={"cpu": "10", "memory": "20Gi"},
    workers=1,

--- a/deploy/examples/llm/components/routerless/prefill_worker.py
+++ b/deploy/examples/llm/components/routerless/prefill_worker.py
@@ -37,7 +37,7 @@ from dynamo.sdk import (
 @service(
    dynamo={
        "enabled": True,
-        "namespace": "dynamo-init",
+        "namespace": "dynamo",
    },
    resources={"cpu": "10", "memory": "20Gi"},
    workers=1,
@@ -82,7 +82,7 @@ class PrefillWorkerRouterLess:
            raise RuntimeError("Failed to initialize engine client")
        runtime = dynamo_context["runtime"]
        metadata = self.engine_client.nixl_metadata
-        self._metadata_store = NixlMetadataStore("dynamo-init", runtime)
+        self._metadata_store = NixlMetadataStore("dynamo", runtime)
        await self._metadata_store.put(metadata.engine_id, metadata)

    @dynamo_endpoint()

--- a/deploy/examples/llm/components/routerless/worker.py
+++ b/deploy/examples/llm/components/routerless/worker.py
@@ -41,7 +41,7 @@ from dynamo.sdk import (
 @service(
    dynamo={
        "enabled": True,
-        "namespace": "dynamo-init",
+        "namespace": "dynamo",
    },
    resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
    workers=1,
@@ -87,7 +87,7 @@ class VllmWorkerRouterLess:
        runtime = dynamo_context["runtime"]
        if self.engine_args.remote_prefill:
            metadata = self.engine_client.nixl_metadata
-            metadata_store = NixlMetadataStore("dynamo-init", runtime)
+            metadata_store = NixlMetadataStore("dynamo", runtime)
            await metadata_store.put(metadata.engine_id, metadata)

        models = OpenAIServingModels(

--- a/deploy/examples/llm/components/worker.py
+++ b/deploy/examples/llm/components/worker.py
@@ -44,7 +44,7 @@ from dynamo.sdk import (
 @service(
    dynamo={
        "enabled": True,
-        "namespace": "dynamo-init",
+        "namespace": "dynamo",
    },
    resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
    workers=1,
@@ -87,7 +87,7 @@ class VllmWorker:
        if self.engine_args.router == "kv":
            VLLM_WORKER_ID = dynamo_context["endpoints"][0].lease_id()
            os.environ["VLLM_WORKER_ID"] = str(VLLM_WORKER_ID)
-            os.environ["VLLM_KV_NAMESPACE"] = "dynamo-init"
+            os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
            os.environ["VLLM_KV_COMPONENT"] = class_name
            vllm_logger.info(f"Generate endpoint ID: {VLLM_WORKER_ID}")
        # note: worker_index is 1-based, but CUDA_VISIBLE_DEVICES is 0-based
@@ -131,7 +131,7 @@ class VllmWorker:

        if self.engine_args.remote_prefill:
            metadata = self.engine_client.nixl_metadata
-            metadata_store = NixlMetadataStore("dynamo-init", runtime)
+            metadata_store = NixlMetadataStore("dynamo", runtime)
            await metadata_store.put(metadata.engine_id, metadata)

        if self.engine_args.conditional_disagg:

--- a/deploy/examples/llm/configs/monolith/routerless_processor_deployment.yaml
+++ b/deploy/examples/llm/configs/monolith/routerless_processor_deployment.yaml
@@ -15,14 +15,14 @@

 Frontend:
  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  endpoint: dynamo-init.Processor.chat/completions
+  endpoint: dynamo.Processor.chat/completions
  port: 8000

 Processor:
  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  block-size: 64
  max-model-len: 16384
-  router: random
+  router: round-robin

 VllmWorker:
  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B

--- a/deploy/examples/llm/configs/monolith/router_based_deployment.yaml
+++ b/deploy/examples/llm/configs/monolith/router_based_deployment.yaml
@@ -15,7 +15,7 @@

 Frontend:
  model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-  endpoint: dynamo-init.Processor.chat/completions
+  endpoint: dynamo.Processor.chat/completions
  port: 8000

 Processor:
@@ -39,6 +39,4 @@ VllmWorker:
  router: kv
  tensor-parallel-size: 1
  ServiceArgs:
-    workers: 2
-    envs:
-      - CUDA_VISIBLE_DEVICES: '0,1'
+    workers: 1