Commit df51a622 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: refactor examples and clean CLI (#195)

parent 0517f757
...@@ -23,45 +23,40 @@ def create_bentoml_cli() -> click.Command: ...@@ -23,45 +23,40 @@ def create_bentoml_cli() -> click.Command:
from bentoml._internal.configuration import BENTOML_VERSION from bentoml._internal.configuration import BENTOML_VERSION
from bentoml._internal.context import server_context from bentoml._internal.context import server_context
from bentoml_cli.bentos import bento_command from bentoml_cli.bentos import bento_command
from bentoml_cli.cloud import cloud_command
from bentoml_cli.containerize import containerize_command from bentoml_cli.containerize import containerize_command
from bentoml_cli.deployment import deployment_command, develop_command from bentoml_cli.utils import get_entry_points
from bentoml_cli.env import env_command
from bentoml_cli.models import model_command
from bentoml_cli.secret import secret_command
from bentoml_cli.utils import BentoMLCommandGroup, get_entry_points
from dynamo.sdk.cli.delete import delete_command
from dynamo.sdk.cli.deploy import deploy_command from dynamo.sdk.cli.deploy import deploy_command
from dynamo.sdk.cli.list import list_command
from dynamo.sdk.cli.run import run_command from dynamo.sdk.cli.run import run_command
from dynamo.sdk.cli.serve import serve_command from dynamo.sdk.cli.serve import serve_command
from dynamo.sdk.cli.server import cloud_command
from dynamo.sdk.cli.start import start_command from dynamo.sdk.cli.start import start_command
from dynamo.sdk.cli.utils import DynamoCommandGroup
server_context.service_type = "cli" server_context.service_type = "cli"
CONTEXT_SETTINGS = {"help_option_names": ("-h", "--help")} CONTEXT_SETTINGS = {"help_option_names": ("-h", "--help")}
@click.group(cls=BentoMLCommandGroup, context_settings=CONTEXT_SETTINGS) @click.group(cls=DynamoCommandGroup, context_settings=CONTEXT_SETTINGS)
@click.version_option(BENTOML_VERSION, "-v", "--version") @click.version_option(BENTOML_VERSION, "-v", "--version")
def bentoml_cli(): # TODO: to be renamed to something.... def bentoml_cli(): # TODO: to be renamed to something....
""" """ """
The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications.
It takes inspiration from and leverages core pieces of the BentoML deployment stack.
At a high level, you use `serve` to run a set of dynamo services locally,
`build` and `containerize` to package them up for deployment, and then `server`
and `deploy` to deploy them to a K8s cluster running the Dynamo Server
"""
# Add top-level CLI commands # Add top-level CLI commands
bentoml_cli.add_command(env_command)
bentoml_cli.add_command(cloud_command) bentoml_cli.add_command(cloud_command)
bentoml_cli.add_command(model_command) bentoml_cli.add_single_command(bento_command, "build")
bentoml_cli.add_subcommands(bento_command)
bentoml_cli.add_subcommands(start_command) bentoml_cli.add_subcommands(start_command)
bentoml_cli.add_subcommands(serve_command) bentoml_cli.add_subcommands(serve_command)
bentoml_cli.add_subcommands(run_command) bentoml_cli.add_subcommands(run_command)
bentoml_cli.add_command(containerize_command) bentoml_cli.add_command(containerize_command)
bentoml_cli.add_command(deploy_command) bentoml_cli.add_command(deploy_command)
bentoml_cli.add_command(develop_command)
bentoml_cli.add_command(deployment_command)
bentoml_cli.add_command(secret_command)
bentoml_cli.add_command(list_command)
bentoml_cli.add_command(delete_command)
# Load commands from extensions # Load commands from extensions
for ep in get_entry_points("bentoml.commands"): for ep in get_entry_points("bentoml.commands"):
bentoml_cli.add_command(ep.load()) bentoml_cli.add_command(ep.load())
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import logging
import bentoml
import click
from bentoml._internal.cloud.base import Spinner
from bentoml_cli.utils import BentoMLCommandGroup
from kubernetes import client, config
from rich.console import Console
logger = logging.getLogger(__name__)
def build_delete_command() -> click.Group:
@click.group(name="delete", cls=BentoMLCommandGroup)
def cli():
"""Delete resources"""
pass
@cli.command(name="bentos")
@click.argument("bento_tag", type=click.STRING, required=False)
@click.option(
"--all",
is_flag=True,
default=False,
help="Delete all bentos in local store",
)
@click.option(
"--force",
is_flag=True,
default=False,
help="Skip confirmation prompt",
)
def delete_bentos(
bento_tag: str | None = None,
all: bool = False,
force: bool = False,
):
"""
Delete bentos from local store
Args:
bento_tag: Tag of the bento to delete
all: Delete all bentos
force: Skip confirmation prompt
"""
console = Console(highlight=False)
# Validate arguments
if not bento_tag and not all:
raise click.ClickException(
"Either specify a bento tag or use --all to delete all bentos"
)
if bento_tag and all:
raise click.ClickException("Cannot specify both a bento tag and --all flag")
with Spinner(console=console) as spinner:
try:
# Get bentos to delete
bentos_to_delete = []
if all:
# Get all bentos
spinner.update("Fetching all bentos")
bentos = bentoml.list()
bentos_to_delete = [str(bento.tag) for bento in bentos]
if not bentos_to_delete:
spinner.log("No bentos found in local store")
return
else:
# Check if the specified bento exists
if bento_tag is not None:
bentos_to_delete = [bento_tag]
else:
# This should never happen due to earlier validation, but handle it anyway
spinner.log("[bold red]No bento tag specified[/]")
return
# Confirm deletion if not forced
if not force:
spinner.stop()
if all:
message = f"Are you sure you want to delete all {len(bentos_to_delete)} bentos from local store?"
else:
message = f"Are you sure you want to delete bento '{bento_tag}' from local store?"
if not click.confirm(message):
console.print("[yellow]Deletion cancelled[/]")
return
spinner.start()
# Delete bentos
for tag in bentos_to_delete:
spinner.update(f"Deleting bento '{tag}'")
try:
bentoml.delete(tag)
spinner.log(f"[green]Successfully deleted bento '{tag}'[/]")
except Exception as e:
spinner.log(
f"[bold red]Failed to delete bento '{tag}': {str(e)}[/]"
)
logger.error(f"Failed to delete bento '{tag}'", exc_info=True)
# Final summary
if all:
spinner.log(
f"[bold green]Deleted {len(bentos_to_delete)} bentos from local store[/]"
)
except Exception as e:
logger.error("Deletion operation failed", exc_info=True)
spinner.log(f"[bold red]Operation failed: {str(e)}[/]")
raise SystemExit(1)
@cli.command(name="deployments")
@click.argument("deployment_name", type=click.STRING, required=False)
@click.option(
"--namespace",
type=click.STRING,
default="default",
help="Kubernetes namespace containing the deployments",
)
@click.option(
"--all",
is_flag=True,
default=False,
help="Delete all deployments in the namespace",
)
@click.option(
"--force",
is_flag=True,
default=False,
help="Skip confirmation prompt",
)
def delete_deployments(
deployment_name: str | None = None,
namespace: str = "default",
all: bool = False,
force: bool = False,
):
"""
Delete deployments from a Kubernetes namespace
Args:
deployment_name: Name of the deployment to delete
namespace: Kubernetes namespace containing the deployments
all: Delete all deployments in the namespace
force: Skip confirmation prompt
"""
console = Console(highlight=False)
# Validate arguments
if not deployment_name and not all:
raise click.ClickException(
"Either specify a deployment name or use --all to delete all deployments"
)
if deployment_name and all:
raise click.ClickException(
"Cannot specify both a deployment name and --all flag"
)
# Load Kubernetes configuration
try:
config.load_kube_config()
api = client.CustomObjectsApi()
except Exception as e:
logger.error("Failed to load Kubernetes configuration", exc_info=True)
raise click.ClickException(
f"Failed to load Kubernetes configuration: {str(e)}"
)
# Define the group, version, and plural for the CRD
group = "nvidia.com"
version = "v1alpha1"
plural = "dynamodeployments"
with Spinner(console=console) as spinner:
try:
# Get deployments to delete
deployments_to_delete = []
if all:
# Get all deployments in the namespace
spinner.update(
f"Fetching all deployments in namespace '{namespace}'"
)
deployments = api.list_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
)
deployments_to_delete = [
item["metadata"]["name"]
for item in deployments.get("items", [])
]
if not deployments_to_delete:
spinner.log(f"No deployments found in namespace '{namespace}'")
return
else:
# Check if the specified deployment exists
try:
api.get_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
name=deployment_name,
)
deployments_to_delete = [deployment_name]
except client.rest.ApiException as e:
if e.status == 404:
spinner.log(
f"[bold red]Deployment '{deployment_name}' not found in namespace '{namespace}'[/]"
)
return
raise
# Confirm deletion if not forced
if not force:
spinner.stop()
if all:
message = f"Are you sure you want to delete all {len(deployments_to_delete)} deployments in namespace '{namespace}'?"
else:
message = f"Are you sure you want to delete deployment '{deployment_name}' in namespace '{namespace}'?"
if not click.confirm(message):
console.print("[yellow]Deletion cancelled[/]")
return
spinner.start()
# Delete deployments
for name in deployments_to_delete:
spinner.update(
f"Deleting deployment '{name}' in namespace '{namespace}'"
)
try:
api.delete_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
name=name,
)
spinner.log(
f"[green]Successfully deleted deployment '{name}'[/]"
)
except client.rest.ApiException as e:
if e.status == 404:
spinner.log(
f"[yellow]Deployment '{name}' not found or already deleted[/]"
)
else:
spinner.log(
f"[bold red]Failed to delete deployment '{name}': {str(e)}[/]"
)
logger.error(
f"Failed to delete deployment '{name}'", exc_info=True
)
# Final summary
if all:
spinner.log(
f"[bold green]Deleted {len(deployments_to_delete)} deployments from namespace '{namespace}'[/]"
)
except Exception as e:
logger.error("Deletion operation failed", exc_info=True)
spinner.log(f"[bold red]Operation failed: {str(e)}[/]")
raise SystemExit(1)
return cli
delete_command = build_delete_command()
...@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) ...@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)
@click.group(name="deploy") @click.group(name="deploy")
def deploy_command_group(): def deploy_command_group():
"""Deploy 🍱 to a cluster""" """Deploy to a cluster"""
pass pass
...@@ -58,6 +58,8 @@ def convert_env_to_dict(env: tuple[str, ...] | None) -> list[dict[str, str]] | N ...@@ -58,6 +58,8 @@ def convert_env_to_dict(env: tuple[str, ...] | None) -> list[dict[str, str]] | N
def build_deploy_command() -> click.Command: def build_deploy_command() -> click.Command:
from bentoml._internal.utils import add_experimental_docstring
@click.command(name="deploy") @click.command(name="deploy")
@click.argument("bento", type=click.STRING, default=".") @click.argument("bento", type=click.STRING, default=".")
@click.option("-n", "--name", type=click.STRING, help="Deployment name") @click.option("-n", "--name", type=click.STRING, help="Deployment name")
...@@ -117,6 +119,7 @@ def build_deploy_command() -> click.Command: ...@@ -117,6 +119,7 @@ def build_deploy_command() -> click.Command:
) )
@click.option("--strategy", type=click.STRING, default="rolling-update") @click.option("--strategy", type=click.STRING, default="rolling-update")
@click.option("--version", type=click.STRING, help="Version tag for the Bento") @click.option("--version", type=click.STRING, help="Version tag for the Bento")
@add_experimental_docstring
def deploy_command( def deploy_command(
bento: str | None, bento: str | None,
name: str | None, name: str | None,
...@@ -136,12 +139,11 @@ def build_deploy_command() -> click.Command: ...@@ -136,12 +139,11 @@ def build_deploy_command() -> click.Command:
version: str | None = None, version: str | None = None,
): ):
""" """
Deploy 🍱 to a cluster Deploy a set of Dynamo services in a Bento to a K8s cluster
\b \b
BENTO is the serving target, it can be: BENTO is the serving target, it can be:
- a tag to a Bento in local Bento store - a tag to a Bento in local Bento store
- a folder containing a valid 'bentofile.yaml'
- a path to a built Bento - a path to a built Bento
""" """
from bentoml._internal.log import configure_server_logging from bentoml._internal.log import configure_server_logging
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import json
import os
import typing as t
import bentoml
import click
from bentoml_cli.utils import BentoMLCommandGroup
from kubernetes import client, config
from rich import print as rich_print
from rich.table import Table
def build_list_command() -> click.Group:
@click.group(name="list", cls=BentoMLCommandGroup)
def cli():
"""List resources"""
pass
@cli.command(name="bentos")
def list_bentos():
"""List all bentos in local store"""
bentos = bentoml.list()
table = Table(box=None, expand=True)
table.add_column("Tag", overflow="fold")
table.add_column("Service", overflow="fold")
table.add_column("Created At", overflow="fold")
for bento in bentos:
table.add_row(
str(bento.tag),
bento.info.service,
bento.info.creation_time.strftime("%Y-%m-%d %H:%M:%S"),
)
rich_print(table)
@cli.command(name="deployments")
@click.option("--namespace", type=click.STRING, help="Kubernetes namespace")
@click.option("--cluster", type=click.STRING, help="Cluster name")
@click.option(
"-o",
"--output",
help="Display the output of this command.",
type=click.Choice(["json", "table"]),
default="table",
)
def list_deployments(
namespace: str | None = None,
cluster: str | None = None,
output: t.Literal["json", "table"] = "table",
):
"""List deployments"""
config.load_kube_config()
api = client.CustomObjectsApi()
# Define the group, version, and plural for the CRD
group = "nvidia.com"
version = "v1alpha1"
plural = "dynamodeployments"
# Get the deployments from the Kubernetes API
deployments = api.list_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
)
if output == "json":
rich_print(json.dumps(deployments, indent=2))
return
# Create table for output
table = Table(box=None, expand=True)
table.add_column("Name", overflow="fold")
table.add_column("Namespace", overflow="fold")
table.add_column("Status", overflow="fold")
table.add_column("Created At", overflow="fold")
table.add_column("Replicas", overflow="fold")
table.add_column("Resources", overflow="fold")
table.add_column("URL", overflow="fold")
ingress_suffix = os.getenv("DYNAMO_INGRESS_SUFFIX", "local")
for item in deployments.get("items", []):
metadata = item.get("metadata", {})
spec = item.get("spec", {})
services = spec.get("services", {}).get("main", {}).get("spec", {})
resources = services.get("resources", {})
ingress = services.get("ingress", {})
# Format resources
resources_str = (
f"CPU: {resources.get('requests', {}).get('cpu', 'N/A')} / {resources.get('limits', {}).get('cpu', 'N/A')}\n"
f"Memory: {resources.get('requests', {}).get('memory', 'N/A')} / {resources.get('limits', {}).get('memory', 'N/A')}\n"
f"GPU: {resources.get('requests', {}).get('gpu', 'N/A')} / {resources.get('limits', {}).get('gpu', 'N/A')}"
)
# Format URL
url = (
f"https://{ingress.get('hostPrefix', 'N/A')}.{ingress_suffix}"
if ingress.get("enabled", False)
else "N/A"
)
table.add_row(
metadata.get("name", "N/A"),
metadata.get("namespace", "N/A"),
item.get("status", {}).get("state", "Unknown"),
metadata.get("creationTimestamp", "N/A"),
f"{services.get('autoscaling', {}).get('minReplicas', 'N/A')} - {services.get('autoscaling', {}).get('maxReplicas', 'N/A')}",
resources_str,
url,
)
rich_print(table)
return cli
list_command = build_list_command()
...@@ -23,9 +23,7 @@ import click ...@@ -23,9 +23,7 @@ import click
def build_run_command() -> click.Group: def build_run_command() -> click.Group:
from bentoml_cli.utils import BentoMLCommandGroup @click.group(name="run")
@click.group(name="run", cls=BentoMLCommandGroup)
def cli(): def cli():
pass pass
......
...@@ -150,9 +150,9 @@ def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]: ...@@ -150,9 +150,9 @@ def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
def build_serve_command() -> click.Group: def build_serve_command() -> click.Group:
from bentoml._internal.log import configure_server_logging from bentoml._internal.log import configure_server_logging
from bentoml_cli.env_manager import env_manager from bentoml_cli.env_manager import env_manager
from bentoml_cli.utils import AliasCommand, BentoMLCommandGroup from bentoml_cli.utils import AliasCommand
@click.group(name="serve", cls=BentoMLCommandGroup) @click.group(name="serve")
def cli(): def cli():
pass pass
...@@ -335,7 +335,7 @@ def build_serve_command() -> click.Group: ...@@ -335,7 +335,7 @@ def build_serve_command() -> click.Group:
timeout_graceful_shutdown: int | None, timeout_graceful_shutdown: int | None,
**attrs: t.Any, **attrs: t.Any,
) -> None: ) -> None:
"""Start a HTTP BentoServer from a given 🍱 """Locally run connected Dynamo services
\b \b
You can also pass service-specific configuration options using --ServiceName.param=value format. You can also pass service-specific configuration options using --ServiceName.param=value format.
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import sys
import click
import rich
from bentoml._internal.cloud.client import RestApiClient
from bentoml._internal.cloud.config import (
DEFAULT_ENDPOINT,
CloudClientConfig,
CloudClientContext,
)
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.utils.cattr import bentoml_cattr
from bentoml.exceptions import CLIException, CloudRESTApiClientError
@click.group(name="server")
def cloud_command():
"""Interact with your Dynamo Server"""
@cloud_command.command()
@click.option(
"--endpoint",
type=click.STRING,
help="Dynamo Server endpoint",
default=DEFAULT_ENDPOINT,
envvar="DYNAMO_SERVER_API_ENDPOINT",
show_default=True,
show_envvar=True,
required=True,
)
@click.option(
"--api-token",
type=click.STRING,
help="Dynamo Server user API token",
envvar="DYNAMO_SERVER_API_KEY",
show_envvar=True,
required=True,
)
def login(endpoint: str, api_token: str) -> None: # type: ignore
"""Connect to your Dynamo Server. You can find deployment instructions for this in our docs"""
try:
cloud_rest_client = RestApiClient(endpoint, api_token)
user = cloud_rest_client.v1.get_current_user()
if user is None:
raise CLIException("current user is not found")
org = cloud_rest_client.v1.get_current_organization()
if org is None:
raise CLIException("current organization is not found")
current_context_name = CloudClientConfig.get_config().current_context_name
cloud_context = BentoMLContainer.cloud_context.get()
ctx = CloudClientContext(
name=cloud_context if cloud_context is not None else current_context_name,
endpoint=endpoint,
api_token=api_token,
email=user.email,
)
ctx.save()
rich.print(
f":white_check_mark: Configured BentoCloud credentials (current-context: {ctx.name})"
)
rich.print(
f":white_check_mark: Logged in as [blue]{user.email}[/] at [blue]{org.name}[/] organization"
)
except CloudRESTApiClientError as e:
if e.error_code == 401:
rich.print(
f":police_car_light: Error validating token: HTTP 401: Bad credentials ({endpoint}/api-token)",
file=sys.stderr,
)
else:
rich.print(
f":police_car_light: Error validating token: HTTP {e.error_code}",
file=sys.stderr,
)
@cloud_command.command()
def current_context() -> None: # type: ignore
"""Get current cloud context."""
rich.print_json(
data=bentoml_cattr.unstructure(CloudClientConfig.get_config().get_context())
)
@cloud_command.command()
def list_context() -> None: # type: ignore
"""List all available context."""
config = CloudClientConfig.get_config()
rich.print_json(data=bentoml_cattr.unstructure([i.name for i in config.contexts]))
@cloud_command.command()
@click.argument("context_name", type=click.STRING)
def update_current_context(context_name: str) -> None: # type: ignore
"""Update current context"""
ctx = CloudClientConfig.get_config().set_current_context(context_name)
rich.print(f"Successfully switched to context: {ctx.name}")
...@@ -484,7 +484,7 @@ def serve_http( ...@@ -484,7 +484,7 @@ def serve_http(
hasattr(svc, "is_dynamo_component") hasattr(svc, "is_dynamo_component")
and svc.is_dynamo_component() and svc.is_dynamo_component()
) )
else 'Starting production %s BentoServer from "%s" (Press CTRL+C to quit)' else "Starting %s (Press CTRL+C to quit)"
), ),
*( *(
(svc.name, *svc.dynamo_address(), scheme, log_host, port) (svc.name, *svc.dynamo_address(), scheme, log_host, port)
...@@ -492,7 +492,7 @@ def serve_http( ...@@ -492,7 +492,7 @@ def serve_http(
hasattr(svc, "is_dynamo_component") hasattr(svc, "is_dynamo_component")
and svc.is_dynamo_component() and svc.is_dynamo_component()
) )
else (scheme.upper(), bento_identifier) else (bento_identifier,)
), ),
), ),
) )
......
...@@ -19,24 +19,32 @@ import json ...@@ -19,24 +19,32 @@ import json
import logging import logging
import os import os
import sys import sys
import typing as t
from typing import Optional from typing import Optional
from urllib.parse import urlparse from urllib.parse import urlparse
import click import click
import rich import rich
import yaml
from dynamo.sdk.cli.serve import _parse_service_args
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def build_start_command() -> click.Group: def build_start_command() -> click.Group:
from bentoml._internal.utils import add_experimental_docstring from bentoml._internal.utils import add_experimental_docstring
from bentoml_cli.utils import BentoMLCommandGroup
@click.group(name="start", cls=BentoMLCommandGroup) @click.group(name="start")
def cli(): def cli():
pass pass
@cli.command() @cli.command(
context_settings=dict(
ignore_unknown_options=True,
allow_extra_args=True,
),
)
@click.argument("bento", type=click.STRING, default=".") @click.argument("bento", type=click.STRING, default=".")
@click.option( @click.option(
"--service-name", "--service-name",
...@@ -46,6 +54,12 @@ def build_start_command() -> click.Group: ...@@ -46,6 +54,12 @@ def build_start_command() -> click.Group:
envvar="BENTOML_SERVE_SERVICE_NAME", envvar="BENTOML_SERVE_SERVICE_NAME",
help="specify the runner name to serve", help="specify the runner name to serve",
) )
@click.option(
"-f",
"--file",
type=click.Path(exists=True),
help="Path to YAML config file for service configuration",
)
@click.option( @click.option(
"--depends", "--depends",
type=click.STRING, type=click.STRING,
...@@ -137,15 +151,24 @@ def build_start_command() -> click.Group: ...@@ -137,15 +151,24 @@ def build_start_command() -> click.Group:
help="Reload Service when code changes detected", help="Reload Service when code changes detected",
default=False, default=False,
) )
@click.option(
"--dry-run",
is_flag=True,
help="Print the final service configuration and exit without starting the server",
default=False,
)
@add_experimental_docstring @add_experimental_docstring
def start( def start(
ctx: click.Context,
bento: str, bento: str,
service_name: str, service_name: str,
dry_run: bool,
depends: Optional[list[str]], depends: Optional[list[str]],
runner_map: Optional[str], runner_map: Optional[str],
bind: Optional[str], bind: Optional[str],
port: Optional[int], port: Optional[int],
host: Optional[str], host: Optional[str],
file: str | None,
backlog: Optional[int], backlog: Optional[int],
working_dir: Optional[str], working_dir: Optional[str],
api_workers: Optional[int], api_workers: Optional[int],
...@@ -162,11 +185,44 @@ def build_start_command() -> click.Group: ...@@ -162,11 +185,44 @@ def build_start_command() -> click.Group:
reload: bool = False, reload: bool = False,
) -> None: ) -> None:
""" """
Start a HTTP API server standalone. This will be used inside Yatai. Start a single Dynamo service. This will be used inside Yatai.
""" """
from bentoml import Service from bentoml import Service
from bentoml._internal.service.loader import load from bentoml._internal.service.loader import load
service_configs: dict[str, dict[str, t.Any]] = {}
# Load file if provided
if file:
with open(file) as f:
yaml_configs = yaml.safe_load(f)
# Initialize service_configs as empty dict if it's None
# Convert nested YAML structure to flat dict with dot notation
for service, configs in yaml_configs.items():
for key, value in configs.items():
if service not in service_configs:
service_configs[service] = {}
service_configs[service][key] = value
# Process service-specific options
cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
for service, configs in cmdline_overrides.items():
for key, value in configs.items():
if service not in service_configs:
service_configs[service] = {}
service_configs[service][key] = value
if dry_run:
rich.print("[bold]Service Configuration:[/bold]")
rich.print(json.dumps(service_configs, indent=2))
rich.print("\n[bold]Environment Variable that would be set:[/bold]")
rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
sys.exit(0)
# Set environment variable with service configuration
if service_configs:
os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)
if working_dir is None: if working_dir is None:
if os.path.isdir(os.path.expanduser(bento)): if os.path.isdir(os.path.expanduser(bento)):
working_dir = os.path.expanduser(bento) working_dir = os.path.expanduser(bento)
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing as t
import click
from click import Command, Context
class DynamoCommandGroup(click.Group):
"""Simplified version of BentoMLCommandGroup for Dynamo CLI"""
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
self.aliases = kwargs.pop("aliases", [])
super().__init__(*args, **kwargs)
self._commands: dict[str, list[str]] = {}
self._aliases: dict[str, str] = {}
def add_command(self, cmd: Command, name: str | None = None) -> None:
assert cmd.callback is not None
callback = cmd.callback
cmd.callback = callback
cmd.context_settings["max_content_width"] = 120
aliases = getattr(cmd, "aliases", None)
if aliases:
assert cmd.name
self._commands[cmd.name] = aliases
self._aliases.update({alias: cmd.name for alias in aliases})
return super().add_command(cmd, name)
def add_subcommands(self, group: click.Group) -> None:
if not isinstance(group, click.MultiCommand):
raise TypeError(
"DynamoCommandGroup.add_subcommands only accepts click.MultiCommand"
)
if isinstance(group, DynamoCommandGroup):
# Common wrappers are already applied, call the super() method
for name, cmd in group.commands.items():
super().add_command(cmd, name)
self._commands.update(group._commands)
self._aliases.update(group._aliases)
else:
for name, cmd in group.commands.items():
self.add_command(cmd, name)
def resolve_alias(self, cmd_name: str):
return self._aliases[cmd_name] if cmd_name in self._aliases else cmd_name
def get_command(self, ctx: Context, cmd_name: str) -> Command | None:
cmd_name = self.resolve_alias(cmd_name)
return super().get_command(ctx, cmd_name)
def add_single_command(self, group: click.Group, command_name: str) -> None:
"""Add a single command from a group by name."""
if not isinstance(group, click.MultiCommand):
raise TypeError("Only accepts click.MultiCommand")
ctx = click.Context(group)
cmd = group.get_command(ctx, command_name)
if cmd is None:
raise ValueError(f"Command '{command_name}' not found in group")
self.add_command(cmd, command_name)
...@@ -139,7 +139,7 @@ class DynamoDependency(Dependency[T]): ...@@ -139,7 +139,7 @@ class DynamoDependency(Dependency[T]):
... ...
await dep.get_endpoint("generate") # equivalent to the following await dep.get_endpoint("generate") # equivalent to the following
router_client = ( router_client = (
await runtime.namespace("dynamo-init") await runtime.namespace("dynamo")
.component("router") .component("router")
.endpoint("generate") .endpoint("generate")
.client() .client()
......
...@@ -27,7 +27,7 @@ This directory contains examples and reference implementations for deploying Lar ...@@ -27,7 +27,7 @@ This directory contains examples and reference implementations for deploying Lar
## Deployment Architectures ## Deployment Architectures
### Monolith ### Aggregated
Single-instance deployment where both prefill and decode are done by the same worker. Single-instance deployment where both prefill and decode are done by the same worker.
### Disaggregated ### Disaggregated
...@@ -83,34 +83,28 @@ This figure shows an overview of the major components to deploy: ...@@ -83,34 +83,28 @@ This figure shows an overview of the major components to deploy:
### Example architectures ### Example architectures
#### Router based worker #### Aggregated serving
```bash ```bash
cd /workspace/deploy/examples/llm cd /workspace/deploy/examples/llm
dynamo serve monolith.router_based_deployment:Frontend -f ./configs/monolith/router_based_deployment.yaml dynamo serve graphs.agg:Frontend -f ./configs/agg.yaml
``` ```
#### Routerless monolith #### Aggregated serving with KV Routing
```bash ```bash
cd /workspace/deploy/examples/llm cd /workspace/deploy/examples/llm
dynamo serve monolith.routerless_deployment:Frontend -f ./configs/monolith/routerless_deployment.yaml dynamo serve graphs.agg_router:Frontend -f ./configs/agg_router.yaml
``` ```
#### Routerless processor based monolith #### Disaggregated serving
```bash
dynamo serve monolith.routerless_processor_deployment:Frontend -f ./configs/monolith/routerless_processor_deployment.yaml
```
#### Router based disaggregated serving
```bash ```bash
cd /workspace/deploy/examples/llm cd /workspace/deploy/examples/llm
dynamo serve disaggregated.router_based_deployment:Frontend -f ./configs/disaggregated/router_based_deployment.yaml dynamo serve graphs.disagg:Frontend -f ./configs/disagg.yaml
``` ```
#### Routerless disaggregated serving #### Disaggregated serving with KV Routing
```bash ```bash
cd /workspace/deploy/examples/llm cd /workspace/deploy/examples/llm
dynamo serve disaggregated.routerless_deployment:Frontend -f ./configs/disaggregated/routerless_deployment.yaml dynamo serve graphs.disagg_router:Frontend -f ./configs/disagg_router.yaml
``` ```
### Client ### Client
......
...@@ -67,7 +67,7 @@ def parse_args(service_name, prefix) -> Namespace: ...@@ -67,7 +67,7 @@ def parse_args(service_name, prefix) -> Namespace:
@service( @service(
dynamo={ dynamo={
"enabled": True, "enabled": True,
"namespace": "dynamo-init", "namespace": "dynamo",
}, },
resources={"cpu": "10", "memory": "20Gi"}, resources={"cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
...@@ -87,7 +87,7 @@ class Router: ...@@ -87,7 +87,7 @@ class Router:
async def async_init(self): async def async_init(self):
self.runtime = dynamo_context["runtime"] self.runtime = dynamo_context["runtime"]
self.workers_client = ( self.workers_client = (
await self.runtime.namespace("dynamo-init") await self.runtime.namespace("dynamo")
.component("VllmWorker") .component("VllmWorker")
.endpoint("generate") .endpoint("generate")
.client() .client()
...@@ -101,7 +101,7 @@ class Router: ...@@ -101,7 +101,7 @@ class Router:
) )
await asyncio.sleep(2) await asyncio.sleep(2)
kv_listener = self.runtime.namespace("dynamo-init").component("VllmWorker") kv_listener = self.runtime.namespace("dynamo").component("VllmWorker")
await kv_listener.create_service() await kv_listener.create_service()
self.indexer = KvIndexer(kv_listener, self.args.block_size) self.indexer = KvIndexer(kv_listener, self.args.block_size)
self.metrics_aggregator = KvMetricsAggregator(kv_listener) self.metrics_aggregator = KvMetricsAggregator(kv_listener)
......
...@@ -44,7 +44,7 @@ class RequestType(BaseModel): ...@@ -44,7 +44,7 @@ class RequestType(BaseModel):
@service( @service(
dynamo={ dynamo={
"enabled": True, "enabled": True,
"namespace": "dynamo-init", "namespace": "dynamo",
}, },
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"}, resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
...@@ -89,7 +89,7 @@ class PrefillWorker: ...@@ -89,7 +89,7 @@ class PrefillWorker:
raise RuntimeError("Failed to initialize engine client") raise RuntimeError("Failed to initialize engine client")
runtime = dynamo_context["runtime"] runtime = dynamo_context["runtime"]
metadata = self.engine_client.nixl_metadata metadata = self.engine_client.nixl_metadata
self._metadata_store = NixlMetadataStore("dynamo-init", runtime) self._metadata_store = NixlMetadataStore("dynamo", runtime)
await self._metadata_store.put(metadata.engine_id, metadata) await self._metadata_store.put(metadata.engine_id, metadata)
task = asyncio.create_task(self.prefill_queue_handler()) task = asyncio.create_task(self.prefill_queue_handler())
task.add_done_callback(lambda _: print("prefill queue handler created")) task.add_done_callback(lambda _: print("prefill queue handler created"))
......
...@@ -41,7 +41,7 @@ class RequestType(Enum): ...@@ -41,7 +41,7 @@ class RequestType(Enum):
@service( @service(
dynamo={ dynamo={
"enabled": True, "enabled": True,
"namespace": "dynamo-init", "namespace": "dynamo",
}, },
resources={"cpu": "10", "memory": "20Gi"}, resources={"cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
......
...@@ -37,7 +37,7 @@ from dynamo.sdk import ( ...@@ -37,7 +37,7 @@ from dynamo.sdk import (
@service( @service(
dynamo={ dynamo={
"enabled": True, "enabled": True,
"namespace": "dynamo-init", "namespace": "dynamo",
}, },
resources={"cpu": "10", "memory": "20Gi"}, resources={"cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
...@@ -82,7 +82,7 @@ class PrefillWorkerRouterLess: ...@@ -82,7 +82,7 @@ class PrefillWorkerRouterLess:
raise RuntimeError("Failed to initialize engine client") raise RuntimeError("Failed to initialize engine client")
runtime = dynamo_context["runtime"] runtime = dynamo_context["runtime"]
metadata = self.engine_client.nixl_metadata metadata = self.engine_client.nixl_metadata
self._metadata_store = NixlMetadataStore("dynamo-init", runtime) self._metadata_store = NixlMetadataStore("dynamo", runtime)
await self._metadata_store.put(metadata.engine_id, metadata) await self._metadata_store.put(metadata.engine_id, metadata)
@dynamo_endpoint() @dynamo_endpoint()
......
...@@ -41,7 +41,7 @@ from dynamo.sdk import ( ...@@ -41,7 +41,7 @@ from dynamo.sdk import (
@service( @service(
dynamo={ dynamo={
"enabled": True, "enabled": True,
"namespace": "dynamo-init", "namespace": "dynamo",
}, },
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"}, resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
...@@ -87,7 +87,7 @@ class VllmWorkerRouterLess: ...@@ -87,7 +87,7 @@ class VllmWorkerRouterLess:
runtime = dynamo_context["runtime"] runtime = dynamo_context["runtime"]
if self.engine_args.remote_prefill: if self.engine_args.remote_prefill:
metadata = self.engine_client.nixl_metadata metadata = self.engine_client.nixl_metadata
metadata_store = NixlMetadataStore("dynamo-init", runtime) metadata_store = NixlMetadataStore("dynamo", runtime)
await metadata_store.put(metadata.engine_id, metadata) await metadata_store.put(metadata.engine_id, metadata)
models = OpenAIServingModels( models = OpenAIServingModels(
......
...@@ -44,7 +44,7 @@ from dynamo.sdk import ( ...@@ -44,7 +44,7 @@ from dynamo.sdk import (
@service( @service(
dynamo={ dynamo={
"enabled": True, "enabled": True,
"namespace": "dynamo-init", "namespace": "dynamo",
}, },
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"}, resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
...@@ -87,7 +87,7 @@ class VllmWorker: ...@@ -87,7 +87,7 @@ class VllmWorker:
if self.engine_args.router == "kv": if self.engine_args.router == "kv":
VLLM_WORKER_ID = dynamo_context["endpoints"][0].lease_id() VLLM_WORKER_ID = dynamo_context["endpoints"][0].lease_id()
os.environ["VLLM_WORKER_ID"] = str(VLLM_WORKER_ID) os.environ["VLLM_WORKER_ID"] = str(VLLM_WORKER_ID)
os.environ["VLLM_KV_NAMESPACE"] = "dynamo-init" os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
os.environ["VLLM_KV_COMPONENT"] = class_name os.environ["VLLM_KV_COMPONENT"] = class_name
vllm_logger.info(f"Generate endpoint ID: {VLLM_WORKER_ID}") vllm_logger.info(f"Generate endpoint ID: {VLLM_WORKER_ID}")
# note: worker_index is 1-based, but CUDA_VISIBLE_DEVICES is 0-based # note: worker_index is 1-based, but CUDA_VISIBLE_DEVICES is 0-based
...@@ -131,7 +131,7 @@ class VllmWorker: ...@@ -131,7 +131,7 @@ class VllmWorker:
if self.engine_args.remote_prefill: if self.engine_args.remote_prefill:
metadata = self.engine_client.nixl_metadata metadata = self.engine_client.nixl_metadata
metadata_store = NixlMetadataStore("dynamo-init", runtime) metadata_store = NixlMetadataStore("dynamo", runtime)
await metadata_store.put(metadata.engine_id, metadata) await metadata_store.put(metadata.engine_id, metadata)
if self.engine_args.conditional_disagg: if self.engine_args.conditional_disagg:
......
...@@ -15,14 +15,14 @@ ...@@ -15,14 +15,14 @@
Frontend: Frontend:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo-init.Processor.chat/completions endpoint: dynamo.Processor.chat/completions
port: 8000 port: 8000
Processor: Processor:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64 block-size: 64
max-model-len: 16384 max-model-len: 16384
router: random router: round-robin
VllmWorker: VllmWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
Frontend: Frontend:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo-init.Processor.chat/completions endpoint: dynamo.Processor.chat/completions
port: 8000 port: 8000
Processor: Processor:
...@@ -39,6 +39,4 @@ VllmWorker: ...@@ -39,6 +39,4 @@ VllmWorker:
router: kv router: kv
tensor-parallel-size: 1 tensor-parallel-size: 1
ServiceArgs: ServiceArgs:
workers: 2 workers: 1
envs:
- CUDA_VISIBLE_DEVICES: '0,1'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment