"test/git@developer.sourcefind.cn:jerrrrry/infinicore.git" did not exist on "30bf79f1ac709477909c875387b760c52cc9e5b6"
Commit df51a622 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: refactor examples and clean CLI (#195)

parent 0517f757
......@@ -23,45 +23,40 @@ def create_bentoml_cli() -> click.Command:
from bentoml._internal.configuration import BENTOML_VERSION
from bentoml._internal.context import server_context
from bentoml_cli.bentos import bento_command
from bentoml_cli.cloud import cloud_command
from bentoml_cli.containerize import containerize_command
from bentoml_cli.deployment import deployment_command, develop_command
from bentoml_cli.env import env_command
from bentoml_cli.models import model_command
from bentoml_cli.secret import secret_command
from bentoml_cli.utils import BentoMLCommandGroup, get_entry_points
from bentoml_cli.utils import get_entry_points
from dynamo.sdk.cli.delete import delete_command
from dynamo.sdk.cli.deploy import deploy_command
from dynamo.sdk.cli.list import list_command
from dynamo.sdk.cli.run import run_command
from dynamo.sdk.cli.serve import serve_command
from dynamo.sdk.cli.server import cloud_command
from dynamo.sdk.cli.start import start_command
from dynamo.sdk.cli.utils import DynamoCommandGroup
server_context.service_type = "cli"
CONTEXT_SETTINGS = {"help_option_names": ("-h", "--help")}
@click.group(cls=BentoMLCommandGroup, context_settings=CONTEXT_SETTINGS)
@click.group(cls=DynamoCommandGroup, context_settings=CONTEXT_SETTINGS)
@click.version_option(BENTOML_VERSION, "-v", "--version")
def bentoml_cli(): # TODO: to be renamed to something....
""" """
"""
The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications.
It takes inspiration from and leverages core pieces of the BentoML deployment stack.
At a high level, you use `serve` to run a set of dynamo services locally,
`build` and `containerize` to package them up for deployment, and then `server`
and `deploy` to deploy them to a K8s cluster running the Dynamo Server
"""
# Add top-level CLI commands
bentoml_cli.add_command(env_command)
bentoml_cli.add_command(cloud_command)
bentoml_cli.add_command(model_command)
bentoml_cli.add_subcommands(bento_command)
bentoml_cli.add_single_command(bento_command, "build")
bentoml_cli.add_subcommands(start_command)
bentoml_cli.add_subcommands(serve_command)
bentoml_cli.add_subcommands(run_command)
bentoml_cli.add_command(containerize_command)
bentoml_cli.add_command(deploy_command)
bentoml_cli.add_command(develop_command)
bentoml_cli.add_command(deployment_command)
bentoml_cli.add_command(secret_command)
bentoml_cli.add_command(list_command)
bentoml_cli.add_command(delete_command)
# Load commands from extensions
for ep in get_entry_points("bentoml.commands"):
bentoml_cli.add_command(ep.load())
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import logging
import bentoml
import click
from bentoml._internal.cloud.base import Spinner
from bentoml_cli.utils import BentoMLCommandGroup
from kubernetes import client, config
from rich.console import Console
logger = logging.getLogger(__name__)
def build_delete_command() -> click.Group:
@click.group(name="delete", cls=BentoMLCommandGroup)
def cli():
"""Delete resources"""
pass
@cli.command(name="bentos")
@click.argument("bento_tag", type=click.STRING, required=False)
@click.option(
"--all",
is_flag=True,
default=False,
help="Delete all bentos in local store",
)
@click.option(
"--force",
is_flag=True,
default=False,
help="Skip confirmation prompt",
)
def delete_bentos(
bento_tag: str | None = None,
all: bool = False,
force: bool = False,
):
"""
Delete bentos from local store
Args:
bento_tag: Tag of the bento to delete
all: Delete all bentos
force: Skip confirmation prompt
"""
console = Console(highlight=False)
# Validate arguments
if not bento_tag and not all:
raise click.ClickException(
"Either specify a bento tag or use --all to delete all bentos"
)
if bento_tag and all:
raise click.ClickException("Cannot specify both a bento tag and --all flag")
with Spinner(console=console) as spinner:
try:
# Get bentos to delete
bentos_to_delete = []
if all:
# Get all bentos
spinner.update("Fetching all bentos")
bentos = bentoml.list()
bentos_to_delete = [str(bento.tag) for bento in bentos]
if not bentos_to_delete:
spinner.log("No bentos found in local store")
return
else:
# Check if the specified bento exists
if bento_tag is not None:
bentos_to_delete = [bento_tag]
else:
# This should never happen due to earlier validation, but handle it anyway
spinner.log("[bold red]No bento tag specified[/]")
return
# Confirm deletion if not forced
if not force:
spinner.stop()
if all:
message = f"Are you sure you want to delete all {len(bentos_to_delete)} bentos from local store?"
else:
message = f"Are you sure you want to delete bento '{bento_tag}' from local store?"
if not click.confirm(message):
console.print("[yellow]Deletion cancelled[/]")
return
spinner.start()
# Delete bentos
for tag in bentos_to_delete:
spinner.update(f"Deleting bento '{tag}'")
try:
bentoml.delete(tag)
spinner.log(f"[green]Successfully deleted bento '{tag}'[/]")
except Exception as e:
spinner.log(
f"[bold red]Failed to delete bento '{tag}': {str(e)}[/]"
)
logger.error(f"Failed to delete bento '{tag}'", exc_info=True)
# Final summary
if all:
spinner.log(
f"[bold green]Deleted {len(bentos_to_delete)} bentos from local store[/]"
)
except Exception as e:
logger.error("Deletion operation failed", exc_info=True)
spinner.log(f"[bold red]Operation failed: {str(e)}[/]")
raise SystemExit(1)
@cli.command(name="deployments")
@click.argument("deployment_name", type=click.STRING, required=False)
@click.option(
"--namespace",
type=click.STRING,
default="default",
help="Kubernetes namespace containing the deployments",
)
@click.option(
"--all",
is_flag=True,
default=False,
help="Delete all deployments in the namespace",
)
@click.option(
"--force",
is_flag=True,
default=False,
help="Skip confirmation prompt",
)
def delete_deployments(
deployment_name: str | None = None,
namespace: str = "default",
all: bool = False,
force: bool = False,
):
"""
Delete deployments from a Kubernetes namespace
Args:
deployment_name: Name of the deployment to delete
namespace: Kubernetes namespace containing the deployments
all: Delete all deployments in the namespace
force: Skip confirmation prompt
"""
console = Console(highlight=False)
# Validate arguments
if not deployment_name and not all:
raise click.ClickException(
"Either specify a deployment name or use --all to delete all deployments"
)
if deployment_name and all:
raise click.ClickException(
"Cannot specify both a deployment name and --all flag"
)
# Load Kubernetes configuration
try:
config.load_kube_config()
api = client.CustomObjectsApi()
except Exception as e:
logger.error("Failed to load Kubernetes configuration", exc_info=True)
raise click.ClickException(
f"Failed to load Kubernetes configuration: {str(e)}"
)
# Define the group, version, and plural for the CRD
group = "nvidia.com"
version = "v1alpha1"
plural = "dynamodeployments"
with Spinner(console=console) as spinner:
try:
# Get deployments to delete
deployments_to_delete = []
if all:
# Get all deployments in the namespace
spinner.update(
f"Fetching all deployments in namespace '{namespace}'"
)
deployments = api.list_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
)
deployments_to_delete = [
item["metadata"]["name"]
for item in deployments.get("items", [])
]
if not deployments_to_delete:
spinner.log(f"No deployments found in namespace '{namespace}'")
return
else:
# Check if the specified deployment exists
try:
api.get_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
name=deployment_name,
)
deployments_to_delete = [deployment_name]
except client.rest.ApiException as e:
if e.status == 404:
spinner.log(
f"[bold red]Deployment '{deployment_name}' not found in namespace '{namespace}'[/]"
)
return
raise
# Confirm deletion if not forced
if not force:
spinner.stop()
if all:
message = f"Are you sure you want to delete all {len(deployments_to_delete)} deployments in namespace '{namespace}'?"
else:
message = f"Are you sure you want to delete deployment '{deployment_name}' in namespace '{namespace}'?"
if not click.confirm(message):
console.print("[yellow]Deletion cancelled[/]")
return
spinner.start()
# Delete deployments
for name in deployments_to_delete:
spinner.update(
f"Deleting deployment '{name}' in namespace '{namespace}'"
)
try:
api.delete_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
name=name,
)
spinner.log(
f"[green]Successfully deleted deployment '{name}'[/]"
)
except client.rest.ApiException as e:
if e.status == 404:
spinner.log(
f"[yellow]Deployment '{name}' not found or already deleted[/]"
)
else:
spinner.log(
f"[bold red]Failed to delete deployment '{name}': {str(e)}[/]"
)
logger.error(
f"Failed to delete deployment '{name}'", exc_info=True
)
# Final summary
if all:
spinner.log(
f"[bold green]Deleted {len(deployments_to_delete)} deployments from namespace '{namespace}'[/]"
)
except Exception as e:
logger.error("Deletion operation failed", exc_info=True)
spinner.log(f"[bold red]Operation failed: {str(e)}[/]")
raise SystemExit(1)
return cli
delete_command = build_delete_command()
......@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)
@click.group(name="deploy")
def deploy_command_group():
"""Deploy 🍱 to a cluster"""
"""Deploy to a cluster"""
pass
......@@ -58,6 +58,8 @@ def convert_env_to_dict(env: tuple[str, ...] | None) -> list[dict[str, str]] | N
def build_deploy_command() -> click.Command:
from bentoml._internal.utils import add_experimental_docstring
@click.command(name="deploy")
@click.argument("bento", type=click.STRING, default=".")
@click.option("-n", "--name", type=click.STRING, help="Deployment name")
......@@ -117,6 +119,7 @@ def build_deploy_command() -> click.Command:
)
@click.option("--strategy", type=click.STRING, default="rolling-update")
@click.option("--version", type=click.STRING, help="Version tag for the Bento")
@add_experimental_docstring
def deploy_command(
bento: str | None,
name: str | None,
......@@ -136,12 +139,11 @@ def build_deploy_command() -> click.Command:
version: str | None = None,
):
"""
Deploy 🍱 to a cluster
Deploy a set of Dynamo services in a Bento to a K8s cluster
\b
BENTO is the serving target, it can be:
- a tag to a Bento in local Bento store
- a folder containing a valid 'bentofile.yaml'
- a path to a built Bento
"""
from bentoml._internal.log import configure_server_logging
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import json
import os
import typing as t
import bentoml
import click
from bentoml_cli.utils import BentoMLCommandGroup
from kubernetes import client, config
from rich import print as rich_print
from rich.table import Table
def build_list_command() -> click.Group:
@click.group(name="list", cls=BentoMLCommandGroup)
def cli():
"""List resources"""
pass
@cli.command(name="bentos")
def list_bentos():
"""List all bentos in local store"""
bentos = bentoml.list()
table = Table(box=None, expand=True)
table.add_column("Tag", overflow="fold")
table.add_column("Service", overflow="fold")
table.add_column("Created At", overflow="fold")
for bento in bentos:
table.add_row(
str(bento.tag),
bento.info.service,
bento.info.creation_time.strftime("%Y-%m-%d %H:%M:%S"),
)
rich_print(table)
@cli.command(name="deployments")
@click.option("--namespace", type=click.STRING, help="Kubernetes namespace")
@click.option("--cluster", type=click.STRING, help="Cluster name")
@click.option(
"-o",
"--output",
help="Display the output of this command.",
type=click.Choice(["json", "table"]),
default="table",
)
def list_deployments(
namespace: str | None = None,
cluster: str | None = None,
output: t.Literal["json", "table"] = "table",
):
"""List deployments"""
config.load_kube_config()
api = client.CustomObjectsApi()
# Define the group, version, and plural for the CRD
group = "nvidia.com"
version = "v1alpha1"
plural = "dynamodeployments"
# Get the deployments from the Kubernetes API
deployments = api.list_namespaced_custom_object(
group=group,
version=version,
namespace=namespace,
plural=plural,
)
if output == "json":
rich_print(json.dumps(deployments, indent=2))
return
# Create table for output
table = Table(box=None, expand=True)
table.add_column("Name", overflow="fold")
table.add_column("Namespace", overflow="fold")
table.add_column("Status", overflow="fold")
table.add_column("Created At", overflow="fold")
table.add_column("Replicas", overflow="fold")
table.add_column("Resources", overflow="fold")
table.add_column("URL", overflow="fold")
ingress_suffix = os.getenv("DYNAMO_INGRESS_SUFFIX", "local")
for item in deployments.get("items", []):
metadata = item.get("metadata", {})
spec = item.get("spec", {})
services = spec.get("services", {}).get("main", {}).get("spec", {})
resources = services.get("resources", {})
ingress = services.get("ingress", {})
# Format resources
resources_str = (
f"CPU: {resources.get('requests', {}).get('cpu', 'N/A')} / {resources.get('limits', {}).get('cpu', 'N/A')}\n"
f"Memory: {resources.get('requests', {}).get('memory', 'N/A')} / {resources.get('limits', {}).get('memory', 'N/A')}\n"
f"GPU: {resources.get('requests', {}).get('gpu', 'N/A')} / {resources.get('limits', {}).get('gpu', 'N/A')}"
)
# Format URL
url = (
f"https://{ingress.get('hostPrefix', 'N/A')}.{ingress_suffix}"
if ingress.get("enabled", False)
else "N/A"
)
table.add_row(
metadata.get("name", "N/A"),
metadata.get("namespace", "N/A"),
item.get("status", {}).get("state", "Unknown"),
metadata.get("creationTimestamp", "N/A"),
f"{services.get('autoscaling', {}).get('minReplicas', 'N/A')} - {services.get('autoscaling', {}).get('maxReplicas', 'N/A')}",
resources_str,
url,
)
rich_print(table)
return cli
list_command = build_list_command()
......@@ -23,9 +23,7 @@ import click
def build_run_command() -> click.Group:
from bentoml_cli.utils import BentoMLCommandGroup
@click.group(name="run", cls=BentoMLCommandGroup)
@click.group(name="run")
def cli():
pass
......
......@@ -150,9 +150,9 @@ def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
def build_serve_command() -> click.Group:
from bentoml._internal.log import configure_server_logging
from bentoml_cli.env_manager import env_manager
from bentoml_cli.utils import AliasCommand, BentoMLCommandGroup
from bentoml_cli.utils import AliasCommand
@click.group(name="serve", cls=BentoMLCommandGroup)
@click.group(name="serve")
def cli():
pass
......@@ -335,7 +335,7 @@ def build_serve_command() -> click.Group:
timeout_graceful_shutdown: int | None,
**attrs: t.Any,
) -> None:
"""Start a HTTP BentoServer from a given 🍱
"""Locally run connected Dynamo services
\b
You can also pass service-specific configuration options using --ServiceName.param=value format.
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import sys
import click
import rich
from bentoml._internal.cloud.client import RestApiClient
from bentoml._internal.cloud.config import (
DEFAULT_ENDPOINT,
CloudClientConfig,
CloudClientContext,
)
from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.utils.cattr import bentoml_cattr
from bentoml.exceptions import CLIException, CloudRESTApiClientError
@click.group(name="server")
def cloud_command():
"""Interact with your Dynamo Server"""
@cloud_command.command()
@click.option(
"--endpoint",
type=click.STRING,
help="Dynamo Server endpoint",
default=DEFAULT_ENDPOINT,
envvar="DYNAMO_SERVER_API_ENDPOINT",
show_default=True,
show_envvar=True,
required=True,
)
@click.option(
"--api-token",
type=click.STRING,
help="Dynamo Server user API token",
envvar="DYNAMO_SERVER_API_KEY",
show_envvar=True,
required=True,
)
def login(endpoint: str, api_token: str) -> None: # type: ignore
"""Connect to your Dynamo Server. You can find deployment instructions for this in our docs"""
try:
cloud_rest_client = RestApiClient(endpoint, api_token)
user = cloud_rest_client.v1.get_current_user()
if user is None:
raise CLIException("current user is not found")
org = cloud_rest_client.v1.get_current_organization()
if org is None:
raise CLIException("current organization is not found")
current_context_name = CloudClientConfig.get_config().current_context_name
cloud_context = BentoMLContainer.cloud_context.get()
ctx = CloudClientContext(
name=cloud_context if cloud_context is not None else current_context_name,
endpoint=endpoint,
api_token=api_token,
email=user.email,
)
ctx.save()
rich.print(
f":white_check_mark: Configured BentoCloud credentials (current-context: {ctx.name})"
)
rich.print(
f":white_check_mark: Logged in as [blue]{user.email}[/] at [blue]{org.name}[/] organization"
)
except CloudRESTApiClientError as e:
if e.error_code == 401:
rich.print(
f":police_car_light: Error validating token: HTTP 401: Bad credentials ({endpoint}/api-token)",
file=sys.stderr,
)
else:
rich.print(
f":police_car_light: Error validating token: HTTP {e.error_code}",
file=sys.stderr,
)
@cloud_command.command()
def current_context() -> None: # type: ignore
"""Get current cloud context."""
rich.print_json(
data=bentoml_cattr.unstructure(CloudClientConfig.get_config().get_context())
)
@cloud_command.command()
def list_context() -> None: # type: ignore
"""List all available context."""
config = CloudClientConfig.get_config()
rich.print_json(data=bentoml_cattr.unstructure([i.name for i in config.contexts]))
@cloud_command.command()
@click.argument("context_name", type=click.STRING)
def update_current_context(context_name: str) -> None: # type: ignore
"""Update current context"""
ctx = CloudClientConfig.get_config().set_current_context(context_name)
rich.print(f"Successfully switched to context: {ctx.name}")
......@@ -484,7 +484,7 @@ def serve_http(
hasattr(svc, "is_dynamo_component")
and svc.is_dynamo_component()
)
else 'Starting production %s BentoServer from "%s" (Press CTRL+C to quit)'
else "Starting %s (Press CTRL+C to quit)"
),
*(
(svc.name, *svc.dynamo_address(), scheme, log_host, port)
......@@ -492,7 +492,7 @@ def serve_http(
hasattr(svc, "is_dynamo_component")
and svc.is_dynamo_component()
)
else (scheme.upper(), bento_identifier)
else (bento_identifier,)
),
),
)
......
......@@ -19,24 +19,32 @@ import json
import logging
import os
import sys
import typing as t
from typing import Optional
from urllib.parse import urlparse
import click
import rich
import yaml
from dynamo.sdk.cli.serve import _parse_service_args
logger = logging.getLogger(__name__)
def build_start_command() -> click.Group:
from bentoml._internal.utils import add_experimental_docstring
from bentoml_cli.utils import BentoMLCommandGroup
@click.group(name="start", cls=BentoMLCommandGroup)
@click.group(name="start")
def cli():
pass
@cli.command()
@cli.command(
context_settings=dict(
ignore_unknown_options=True,
allow_extra_args=True,
),
)
@click.argument("bento", type=click.STRING, default=".")
@click.option(
"--service-name",
......@@ -46,6 +54,12 @@ def build_start_command() -> click.Group:
envvar="BENTOML_SERVE_SERVICE_NAME",
help="specify the runner name to serve",
)
@click.option(
"-f",
"--file",
type=click.Path(exists=True),
help="Path to YAML config file for service configuration",
)
@click.option(
"--depends",
type=click.STRING,
......@@ -137,15 +151,24 @@ def build_start_command() -> click.Group:
help="Reload Service when code changes detected",
default=False,
)
@click.option(
"--dry-run",
is_flag=True,
help="Print the final service configuration and exit without starting the server",
default=False,
)
@add_experimental_docstring
def start(
ctx: click.Context,
bento: str,
service_name: str,
dry_run: bool,
depends: Optional[list[str]],
runner_map: Optional[str],
bind: Optional[str],
port: Optional[int],
host: Optional[str],
file: str | None,
backlog: Optional[int],
working_dir: Optional[str],
api_workers: Optional[int],
......@@ -162,11 +185,44 @@ def build_start_command() -> click.Group:
reload: bool = False,
) -> None:
"""
Start a HTTP API server standalone. This will be used inside Yatai.
Start a single Dynamo service. This will be used inside Yatai.
"""
from bentoml import Service
from bentoml._internal.service.loader import load
service_configs: dict[str, dict[str, t.Any]] = {}
# Load file if provided
if file:
with open(file) as f:
yaml_configs = yaml.safe_load(f)
# Initialize service_configs as empty dict if it's None
# Convert nested YAML structure to flat dict with dot notation
for service, configs in yaml_configs.items():
for key, value in configs.items():
if service not in service_configs:
service_configs[service] = {}
service_configs[service][key] = value
# Process service-specific options
cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
for service, configs in cmdline_overrides.items():
for key, value in configs.items():
if service not in service_configs:
service_configs[service] = {}
service_configs[service][key] = value
if dry_run:
rich.print("[bold]Service Configuration:[/bold]")
rich.print(json.dumps(service_configs, indent=2))
rich.print("\n[bold]Environment Variable that would be set:[/bold]")
rich.print(f"DYNAMO_SERVICE_CONFIG={json.dumps(service_configs)}")
sys.exit(0)
# Set environment variable with service configuration
if service_configs:
os.environ["DYNAMO_SERVICE_CONFIG"] = json.dumps(service_configs)
if working_dir is None:
if os.path.isdir(os.path.expanduser(bento)):
working_dir = os.path.expanduser(bento)
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing as t
import click
from click import Command, Context
class DynamoCommandGroup(click.Group):
"""Simplified version of BentoMLCommandGroup for Dynamo CLI"""
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
self.aliases = kwargs.pop("aliases", [])
super().__init__(*args, **kwargs)
self._commands: dict[str, list[str]] = {}
self._aliases: dict[str, str] = {}
def add_command(self, cmd: Command, name: str | None = None) -> None:
assert cmd.callback is not None
callback = cmd.callback
cmd.callback = callback
cmd.context_settings["max_content_width"] = 120
aliases = getattr(cmd, "aliases", None)
if aliases:
assert cmd.name
self._commands[cmd.name] = aliases
self._aliases.update({alias: cmd.name for alias in aliases})
return super().add_command(cmd, name)
def add_subcommands(self, group: click.Group) -> None:
if not isinstance(group, click.MultiCommand):
raise TypeError(
"DynamoCommandGroup.add_subcommands only accepts click.MultiCommand"
)
if isinstance(group, DynamoCommandGroup):
# Common wrappers are already applied, call the super() method
for name, cmd in group.commands.items():
super().add_command(cmd, name)
self._commands.update(group._commands)
self._aliases.update(group._aliases)
else:
for name, cmd in group.commands.items():
self.add_command(cmd, name)
def resolve_alias(self, cmd_name: str):
return self._aliases[cmd_name] if cmd_name in self._aliases else cmd_name
def get_command(self, ctx: Context, cmd_name: str) -> Command | None:
cmd_name = self.resolve_alias(cmd_name)
return super().get_command(ctx, cmd_name)
def add_single_command(self, group: click.Group, command_name: str) -> None:
"""Add a single command from a group by name."""
if not isinstance(group, click.MultiCommand):
raise TypeError("Only accepts click.MultiCommand")
ctx = click.Context(group)
cmd = group.get_command(ctx, command_name)
if cmd is None:
raise ValueError(f"Command '{command_name}' not found in group")
self.add_command(cmd, command_name)
......@@ -139,7 +139,7 @@ class DynamoDependency(Dependency[T]):
...
await dep.get_endpoint("generate") # equivalent to the following
router_client = (
await runtime.namespace("dynamo-init")
await runtime.namespace("dynamo")
.component("router")
.endpoint("generate")
.client()
......
......@@ -27,7 +27,7 @@ This directory contains examples and reference implementations for deploying Lar
## Deployment Architectures
### Monolith
### Aggregated
Single-instance deployment where both prefill and decode are done by the same worker.
### Disaggregated
......@@ -83,34 +83,28 @@ This figure shows an overview of the major components to deploy:
### Example architectures
#### Router based worker
#### Aggregated serving
```bash
cd /workspace/deploy/examples/llm
dynamo serve monolith.router_based_deployment:Frontend -f ./configs/monolith/router_based_deployment.yaml
dynamo serve graphs.agg:Frontend -f ./configs/agg.yaml
```
#### Routerless monolith
#### Aggregated serving with KV Routing
```bash
cd /workspace/deploy/examples/llm
dynamo serve monolith.routerless_deployment:Frontend -f ./configs/monolith/routerless_deployment.yaml
dynamo serve graphs.agg_router:Frontend -f ./configs/agg_router.yaml
```
#### Routerless processor based monolith
```bash
dynamo serve monolith.routerless_processor_deployment:Frontend -f ./configs/monolith/routerless_processor_deployment.yaml
```
#### Router based disaggregated serving
#### Disaggregated serving
```bash
cd /workspace/deploy/examples/llm
dynamo serve disaggregated.router_based_deployment:Frontend -f ./configs/disaggregated/router_based_deployment.yaml
dynamo serve graphs.disagg:Frontend -f ./configs/disagg.yaml
```
#### Routerless disaggregated serving
#### Disaggregated serving with KV Routing
```bash
cd /workspace/deploy/examples/llm
dynamo serve disaggregated.routerless_deployment:Frontend -f ./configs/disaggregated/routerless_deployment.yaml
dynamo serve graphs.disagg_router:Frontend -f ./configs/disagg_router.yaml
```
### Client
......
......@@ -67,7 +67,7 @@ def parse_args(service_name, prefix) -> Namespace:
@service(
dynamo={
"enabled": True,
"namespace": "dynamo-init",
"namespace": "dynamo",
},
resources={"cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -87,7 +87,7 @@ class Router:
async def async_init(self):
self.runtime = dynamo_context["runtime"]
self.workers_client = (
await self.runtime.namespace("dynamo-init")
await self.runtime.namespace("dynamo")
.component("VllmWorker")
.endpoint("generate")
.client()
......@@ -101,7 +101,7 @@ class Router:
)
await asyncio.sleep(2)
kv_listener = self.runtime.namespace("dynamo-init").component("VllmWorker")
kv_listener = self.runtime.namespace("dynamo").component("VllmWorker")
await kv_listener.create_service()
self.indexer = KvIndexer(kv_listener, self.args.block_size)
self.metrics_aggregator = KvMetricsAggregator(kv_listener)
......
......@@ -44,7 +44,7 @@ class RequestType(BaseModel):
@service(
dynamo={
"enabled": True,
"namespace": "dynamo-init",
"namespace": "dynamo",
},
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -89,7 +89,7 @@ class PrefillWorker:
raise RuntimeError("Failed to initialize engine client")
runtime = dynamo_context["runtime"]
metadata = self.engine_client.nixl_metadata
self._metadata_store = NixlMetadataStore("dynamo-init", runtime)
self._metadata_store = NixlMetadataStore("dynamo", runtime)
await self._metadata_store.put(metadata.engine_id, metadata)
task = asyncio.create_task(self.prefill_queue_handler())
task.add_done_callback(lambda _: print("prefill queue handler created"))
......
......@@ -41,7 +41,7 @@ class RequestType(Enum):
@service(
dynamo={
"enabled": True,
"namespace": "dynamo-init",
"namespace": "dynamo",
},
resources={"cpu": "10", "memory": "20Gi"},
workers=1,
......
......@@ -37,7 +37,7 @@ from dynamo.sdk import (
@service(
dynamo={
"enabled": True,
"namespace": "dynamo-init",
"namespace": "dynamo",
},
resources={"cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -82,7 +82,7 @@ class PrefillWorkerRouterLess:
raise RuntimeError("Failed to initialize engine client")
runtime = dynamo_context["runtime"]
metadata = self.engine_client.nixl_metadata
self._metadata_store = NixlMetadataStore("dynamo-init", runtime)
self._metadata_store = NixlMetadataStore("dynamo", runtime)
await self._metadata_store.put(metadata.engine_id, metadata)
@dynamo_endpoint()
......
......@@ -41,7 +41,7 @@ from dynamo.sdk import (
@service(
dynamo={
"enabled": True,
"namespace": "dynamo-init",
"namespace": "dynamo",
},
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -87,7 +87,7 @@ class VllmWorkerRouterLess:
runtime = dynamo_context["runtime"]
if self.engine_args.remote_prefill:
metadata = self.engine_client.nixl_metadata
metadata_store = NixlMetadataStore("dynamo-init", runtime)
metadata_store = NixlMetadataStore("dynamo", runtime)
await metadata_store.put(metadata.engine_id, metadata)
models = OpenAIServingModels(
......
......@@ -44,7 +44,7 @@ from dynamo.sdk import (
@service(
dynamo={
"enabled": True,
"namespace": "dynamo-init",
"namespace": "dynamo",
},
resources={"gpu": 1, "cpu": "10", "memory": "20Gi"},
workers=1,
......@@ -87,7 +87,7 @@ class VllmWorker:
if self.engine_args.router == "kv":
VLLM_WORKER_ID = dynamo_context["endpoints"][0].lease_id()
os.environ["VLLM_WORKER_ID"] = str(VLLM_WORKER_ID)
os.environ["VLLM_KV_NAMESPACE"] = "dynamo-init"
os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
os.environ["VLLM_KV_COMPONENT"] = class_name
vllm_logger.info(f"Generate endpoint ID: {VLLM_WORKER_ID}")
# note: worker_index is 1-based, but CUDA_VISIBLE_DEVICES is 0-based
......@@ -131,7 +131,7 @@ class VllmWorker:
if self.engine_args.remote_prefill:
metadata = self.engine_client.nixl_metadata
metadata_store = NixlMetadataStore("dynamo-init", runtime)
metadata_store = NixlMetadataStore("dynamo", runtime)
await metadata_store.put(metadata.engine_id, metadata)
if self.engine_args.conditional_disagg:
......
......@@ -15,14 +15,14 @@
Frontend:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo-init.Processor.chat/completions
endpoint: dynamo.Processor.chat/completions
port: 8000
Processor:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size: 64
max-model-len: 16384
router: random
router: round-robin
VllmWorker:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
......
......@@ -15,7 +15,7 @@
Frontend:
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint: dynamo-init.Processor.chat/completions
endpoint: dynamo.Processor.chat/completions
port: 8000
Processor:
......@@ -39,6 +39,4 @@ VllmWorker:
router: kv
tensor-parallel-size: 1
ServiceArgs:
workers: 2
envs:
- CUDA_VISIBLE_DEVICES: '0,1'
workers: 1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment