Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
df51a622
Commit
df51a622
authored
Mar 16, 2025
by
ishandhanani
Committed by
GitHub
Mar 17, 2025
Browse files
chore: refactor examples and clean CLI (#195)
parent
0517f757
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
307 additions
and
500 deletions
+307
-500
deploy/dynamo/sdk/src/dynamo/sdk/cli/cli.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/cli.py
+13
-18
deploy/dynamo/sdk/src/dynamo/sdk/cli/delete.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/delete.py
+0
-295
deploy/dynamo/sdk/src/dynamo/sdk/cli/deploy.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/deploy.py
+5
-3
deploy/dynamo/sdk/src/dynamo/sdk/cli/list.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/list.py
+0
-137
deploy/dynamo/sdk/src/dynamo/sdk/cli/run.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/run.py
+1
-3
deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
+3
-3
deploy/dynamo/sdk/src/dynamo/sdk/cli/server.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/server.py
+121
-0
deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
+2
-2
deploy/dynamo/sdk/src/dynamo/sdk/cli/start.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/start.py
+60
-4
deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
+75
-0
deploy/dynamo/sdk/src/dynamo/sdk/lib/dependency.py
deploy/dynamo/sdk/src/dynamo/sdk/lib/dependency.py
+1
-1
deploy/examples/llm/README.md
deploy/examples/llm/README.md
+9
-15
deploy/examples/llm/components/kv_router.py
deploy/examples/llm/components/kv_router.py
+3
-3
deploy/examples/llm/components/prefill_worker.py
deploy/examples/llm/components/prefill_worker.py
+2
-2
deploy/examples/llm/components/processor.py
deploy/examples/llm/components/processor.py
+1
-1
deploy/examples/llm/components/routerless/prefill_worker.py
deploy/examples/llm/components/routerless/prefill_worker.py
+2
-2
deploy/examples/llm/components/routerless/worker.py
deploy/examples/llm/components/routerless/worker.py
+2
-2
deploy/examples/llm/components/worker.py
deploy/examples/llm/components/worker.py
+3
-3
deploy/examples/llm/configs/agg.yaml
deploy/examples/llm/configs/agg.yaml
+2
-2
deploy/examples/llm/configs/agg_router.yaml
deploy/examples/llm/configs/agg_router.yaml
+2
-4
No files found.
deploy/dynamo/sdk/src/dynamo/sdk/cli/cli.py
View file @
df51a622
...
@@ -23,45 +23,40 @@ def create_bentoml_cli() -> click.Command:
...
@@ -23,45 +23,40 @@ def create_bentoml_cli() -> click.Command:
from
bentoml._internal.configuration
import
BENTOML_VERSION
from
bentoml._internal.configuration
import
BENTOML_VERSION
from
bentoml._internal.context
import
server_context
from
bentoml._internal.context
import
server_context
from
bentoml_cli.bentos
import
bento_command
from
bentoml_cli.bentos
import
bento_command
from
bentoml_cli.cloud
import
cloud_command
from
bentoml_cli.containerize
import
containerize_command
from
bentoml_cli.containerize
import
containerize_command
from
bentoml_cli.deployment
import
deployment_command
,
develop_command
from
bentoml_cli.utils
import
get_entry_points
from
bentoml_cli.env
import
env_command
from
bentoml_cli.models
import
model_command
from
bentoml_cli.secret
import
secret_command
from
bentoml_cli.utils
import
BentoMLCommandGroup
,
get_entry_points
from
dynamo.sdk.cli.delete
import
delete_command
from
dynamo.sdk.cli.deploy
import
deploy_command
from
dynamo.sdk.cli.deploy
import
deploy_command
from
dynamo.sdk.cli.list
import
list_command
from
dynamo.sdk.cli.run
import
run_command
from
dynamo.sdk.cli.run
import
run_command
from
dynamo.sdk.cli.serve
import
serve_command
from
dynamo.sdk.cli.serve
import
serve_command
from
dynamo.sdk.cli.server
import
cloud_command
from
dynamo.sdk.cli.start
import
start_command
from
dynamo.sdk.cli.start
import
start_command
from
dynamo.sdk.cli.utils
import
DynamoCommandGroup
server_context
.
service_type
=
"cli"
server_context
.
service_type
=
"cli"
CONTEXT_SETTINGS
=
{
"help_option_names"
:
(
"-h"
,
"--help"
)}
CONTEXT_SETTINGS
=
{
"help_option_names"
:
(
"-h"
,
"--help"
)}
@
click
.
group
(
cls
=
BentoML
CommandGroup
,
context_settings
=
CONTEXT_SETTINGS
)
@
click
.
group
(
cls
=
Dynamo
CommandGroup
,
context_settings
=
CONTEXT_SETTINGS
)
@
click
.
version_option
(
BENTOML_VERSION
,
"-v"
,
"--version"
)
@
click
.
version_option
(
BENTOML_VERSION
,
"-v"
,
"--version"
)
def
bentoml_cli
():
# TODO: to be renamed to something....
def
bentoml_cli
():
# TODO: to be renamed to something....
""" """
"""
The Dynamo CLI is a CLI for serving, containerizing, and deploying Dynamo applications.
It takes inspiration from and leverages core pieces of the BentoML deployment stack.
At a high level, you use `serve` to run a set of dynamo services locally,
`build` and `containerize` to package them up for deployment, and then `server`
and `deploy` to deploy them to a K8s cluster running the Dynamo Server
"""
# Add top-level CLI commands
# Add top-level CLI commands
bentoml_cli
.
add_command
(
env_command
)
bentoml_cli
.
add_command
(
cloud_command
)
bentoml_cli
.
add_command
(
cloud_command
)
bentoml_cli
.
add_command
(
model_command
)
bentoml_cli
.
add_single_command
(
bento_command
,
"build"
)
bentoml_cli
.
add_subcommands
(
bento_command
)
bentoml_cli
.
add_subcommands
(
start_command
)
bentoml_cli
.
add_subcommands
(
start_command
)
bentoml_cli
.
add_subcommands
(
serve_command
)
bentoml_cli
.
add_subcommands
(
serve_command
)
bentoml_cli
.
add_subcommands
(
run_command
)
bentoml_cli
.
add_subcommands
(
run_command
)
bentoml_cli
.
add_command
(
containerize_command
)
bentoml_cli
.
add_command
(
containerize_command
)
bentoml_cli
.
add_command
(
deploy_command
)
bentoml_cli
.
add_command
(
deploy_command
)
bentoml_cli
.
add_command
(
develop_command
)
bentoml_cli
.
add_command
(
deployment_command
)
bentoml_cli
.
add_command
(
secret_command
)
bentoml_cli
.
add_command
(
list_command
)
bentoml_cli
.
add_command
(
delete_command
)
# Load commands from extensions
# Load commands from extensions
for
ep
in
get_entry_points
(
"bentoml.commands"
):
for
ep
in
get_entry_points
(
"bentoml.commands"
):
bentoml_cli
.
add_command
(
ep
.
load
())
bentoml_cli
.
add_command
(
ep
.
load
())
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/delete.py
deleted
100644 → 0
View file @
0517f757
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
annotations
import
logging
import
bentoml
import
click
from
bentoml._internal.cloud.base
import
Spinner
from
bentoml_cli.utils
import
BentoMLCommandGroup
from
kubernetes
import
client
,
config
from
rich.console
import
Console
logger
=
logging
.
getLogger
(
__name__
)
def
build_delete_command
()
->
click
.
Group
:
@
click
.
group
(
name
=
"delete"
,
cls
=
BentoMLCommandGroup
)
def
cli
():
"""Delete resources"""
pass
@
cli
.
command
(
name
=
"bentos"
)
@
click
.
argument
(
"bento_tag"
,
type
=
click
.
STRING
,
required
=
False
)
@
click
.
option
(
"--all"
,
is_flag
=
True
,
default
=
False
,
help
=
"Delete all bentos in local store"
,
)
@
click
.
option
(
"--force"
,
is_flag
=
True
,
default
=
False
,
help
=
"Skip confirmation prompt"
,
)
def
delete_bentos
(
bento_tag
:
str
|
None
=
None
,
all
:
bool
=
False
,
force
:
bool
=
False
,
):
"""
Delete bentos from local store
Args:
bento_tag: Tag of the bento to delete
all: Delete all bentos
force: Skip confirmation prompt
"""
console
=
Console
(
highlight
=
False
)
# Validate arguments
if
not
bento_tag
and
not
all
:
raise
click
.
ClickException
(
"Either specify a bento tag or use --all to delete all bentos"
)
if
bento_tag
and
all
:
raise
click
.
ClickException
(
"Cannot specify both a bento tag and --all flag"
)
with
Spinner
(
console
=
console
)
as
spinner
:
try
:
# Get bentos to delete
bentos_to_delete
=
[]
if
all
:
# Get all bentos
spinner
.
update
(
"Fetching all bentos"
)
bentos
=
bentoml
.
list
()
bentos_to_delete
=
[
str
(
bento
.
tag
)
for
bento
in
bentos
]
if
not
bentos_to_delete
:
spinner
.
log
(
"No bentos found in local store"
)
return
else
:
# Check if the specified bento exists
if
bento_tag
is
not
None
:
bentos_to_delete
=
[
bento_tag
]
else
:
# This should never happen due to earlier validation, but handle it anyway
spinner
.
log
(
"[bold red]No bento tag specified[/]"
)
return
# Confirm deletion if not forced
if
not
force
:
spinner
.
stop
()
if
all
:
message
=
f
"Are you sure you want to delete all
{
len
(
bentos_to_delete
)
}
bentos from local store?"
else
:
message
=
f
"Are you sure you want to delete bento '
{
bento_tag
}
' from local store?"
if
not
click
.
confirm
(
message
):
console
.
print
(
"[yellow]Deletion cancelled[/]"
)
return
spinner
.
start
()
# Delete bentos
for
tag
in
bentos_to_delete
:
spinner
.
update
(
f
"Deleting bento '
{
tag
}
'"
)
try
:
bentoml
.
delete
(
tag
)
spinner
.
log
(
f
"[green]Successfully deleted bento '
{
tag
}
'[/]"
)
except
Exception
as
e
:
spinner
.
log
(
f
"[bold red]Failed to delete bento '
{
tag
}
':
{
str
(
e
)
}
[/]"
)
logger
.
error
(
f
"Failed to delete bento '
{
tag
}
'"
,
exc_info
=
True
)
# Final summary
if
all
:
spinner
.
log
(
f
"[bold green]Deleted
{
len
(
bentos_to_delete
)
}
bentos from local store[/]"
)
except
Exception
as
e
:
logger
.
error
(
"Deletion operation failed"
,
exc_info
=
True
)
spinner
.
log
(
f
"[bold red]Operation failed:
{
str
(
e
)
}
[/]"
)
raise
SystemExit
(
1
)
@
cli
.
command
(
name
=
"deployments"
)
@
click
.
argument
(
"deployment_name"
,
type
=
click
.
STRING
,
required
=
False
)
@
click
.
option
(
"--namespace"
,
type
=
click
.
STRING
,
default
=
"default"
,
help
=
"Kubernetes namespace containing the deployments"
,
)
@
click
.
option
(
"--all"
,
is_flag
=
True
,
default
=
False
,
help
=
"Delete all deployments in the namespace"
,
)
@
click
.
option
(
"--force"
,
is_flag
=
True
,
default
=
False
,
help
=
"Skip confirmation prompt"
,
)
def
delete_deployments
(
deployment_name
:
str
|
None
=
None
,
namespace
:
str
=
"default"
,
all
:
bool
=
False
,
force
:
bool
=
False
,
):
"""
Delete deployments from a Kubernetes namespace
Args:
deployment_name: Name of the deployment to delete
namespace: Kubernetes namespace containing the deployments
all: Delete all deployments in the namespace
force: Skip confirmation prompt
"""
console
=
Console
(
highlight
=
False
)
# Validate arguments
if
not
deployment_name
and
not
all
:
raise
click
.
ClickException
(
"Either specify a deployment name or use --all to delete all deployments"
)
if
deployment_name
and
all
:
raise
click
.
ClickException
(
"Cannot specify both a deployment name and --all flag"
)
# Load Kubernetes configuration
try
:
config
.
load_kube_config
()
api
=
client
.
CustomObjectsApi
()
except
Exception
as
e
:
logger
.
error
(
"Failed to load Kubernetes configuration"
,
exc_info
=
True
)
raise
click
.
ClickException
(
f
"Failed to load Kubernetes configuration:
{
str
(
e
)
}
"
)
# Define the group, version, and plural for the CRD
group
=
"nvidia.com"
version
=
"v1alpha1"
plural
=
"dynamodeployments"
with
Spinner
(
console
=
console
)
as
spinner
:
try
:
# Get deployments to delete
deployments_to_delete
=
[]
if
all
:
# Get all deployments in the namespace
spinner
.
update
(
f
"Fetching all deployments in namespace '
{
namespace
}
'"
)
deployments
=
api
.
list_namespaced_custom_object
(
group
=
group
,
version
=
version
,
namespace
=
namespace
,
plural
=
plural
,
)
deployments_to_delete
=
[
item
[
"metadata"
][
"name"
]
for
item
in
deployments
.
get
(
"items"
,
[])
]
if
not
deployments_to_delete
:
spinner
.
log
(
f
"No deployments found in namespace '
{
namespace
}
'"
)
return
else
:
# Check if the specified deployment exists
try
:
api
.
get_namespaced_custom_object
(
group
=
group
,
version
=
version
,
namespace
=
namespace
,
plural
=
plural
,
name
=
deployment_name
,
)
deployments_to_delete
=
[
deployment_name
]
except
client
.
rest
.
ApiException
as
e
:
if
e
.
status
==
404
:
spinner
.
log
(
f
"[bold red]Deployment '
{
deployment_name
}
' not found in namespace '
{
namespace
}
'[/]"
)
return
raise
# Confirm deletion if not forced
if
not
force
:
spinner
.
stop
()
if
all
:
message
=
f
"Are you sure you want to delete all
{
len
(
deployments_to_delete
)
}
deployments in namespace '
{
namespace
}
'?"
else
:
message
=
f
"Are you sure you want to delete deployment '
{
deployment_name
}
' in namespace '
{
namespace
}
'?"
if
not
click
.
confirm
(
message
):
console
.
print
(
"[yellow]Deletion cancelled[/]"
)
return
spinner
.
start
()
# Delete deployments
for
name
in
deployments_to_delete
:
spinner
.
update
(
f
"Deleting deployment '
{
name
}
' in namespace '
{
namespace
}
'"
)
try
:
api
.
delete_namespaced_custom_object
(
group
=
group
,
version
=
version
,
namespace
=
namespace
,
plural
=
plural
,
name
=
name
,
)
spinner
.
log
(
f
"[green]Successfully deleted deployment '
{
name
}
'[/]"
)
except
client
.
rest
.
ApiException
as
e
:
if
e
.
status
==
404
:
spinner
.
log
(
f
"[yellow]Deployment '
{
name
}
' not found or already deleted[/]"
)
else
:
spinner
.
log
(
f
"[bold red]Failed to delete deployment '
{
name
}
':
{
str
(
e
)
}
[/]"
)
logger
.
error
(
f
"Failed to delete deployment '
{
name
}
'"
,
exc_info
=
True
)
# Final summary
if
all
:
spinner
.
log
(
f
"[bold green]Deleted
{
len
(
deployments_to_delete
)
}
deployments from namespace '
{
namespace
}
'[/]"
)
except
Exception
as
e
:
logger
.
error
(
"Deletion operation failed"
,
exc_info
=
True
)
spinner
.
log
(
f
"[bold red]Operation failed:
{
str
(
e
)
}
[/]"
)
raise
SystemExit
(
1
)
return
cli
delete_command
=
build_delete_command
()
deploy/dynamo/sdk/src/dynamo/sdk/cli/deploy.py
View file @
df51a622
...
@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)
...
@@ -36,7 +36,7 @@ logger = logging.getLogger(__name__)
@
click
.
group
(
name
=
"deploy"
)
@
click
.
group
(
name
=
"deploy"
)
def
deploy_command_group
():
def
deploy_command_group
():
"""Deploy
🍱
to a cluster"""
"""Deploy to a cluster"""
pass
pass
...
@@ -58,6 +58,8 @@ def convert_env_to_dict(env: tuple[str, ...] | None) -> list[dict[str, str]] | N
...
@@ -58,6 +58,8 @@ def convert_env_to_dict(env: tuple[str, ...] | None) -> list[dict[str, str]] | N
def
build_deploy_command
()
->
click
.
Command
:
def
build_deploy_command
()
->
click
.
Command
:
from
bentoml._internal.utils
import
add_experimental_docstring
@
click
.
command
(
name
=
"deploy"
)
@
click
.
command
(
name
=
"deploy"
)
@
click
.
argument
(
"bento"
,
type
=
click
.
STRING
,
default
=
"."
)
@
click
.
argument
(
"bento"
,
type
=
click
.
STRING
,
default
=
"."
)
@
click
.
option
(
"-n"
,
"--name"
,
type
=
click
.
STRING
,
help
=
"Deployment name"
)
@
click
.
option
(
"-n"
,
"--name"
,
type
=
click
.
STRING
,
help
=
"Deployment name"
)
...
@@ -117,6 +119,7 @@ def build_deploy_command() -> click.Command:
...
@@ -117,6 +119,7 @@ def build_deploy_command() -> click.Command:
)
)
@
click
.
option
(
"--strategy"
,
type
=
click
.
STRING
,
default
=
"rolling-update"
)
@
click
.
option
(
"--strategy"
,
type
=
click
.
STRING
,
default
=
"rolling-update"
)
@
click
.
option
(
"--version"
,
type
=
click
.
STRING
,
help
=
"Version tag for the Bento"
)
@
click
.
option
(
"--version"
,
type
=
click
.
STRING
,
help
=
"Version tag for the Bento"
)
@
add_experimental_docstring
def
deploy_command
(
def
deploy_command
(
bento
:
str
|
None
,
bento
:
str
|
None
,
name
:
str
|
None
,
name
:
str
|
None
,
...
@@ -136,12 +139,11 @@ def build_deploy_command() -> click.Command:
...
@@ -136,12 +139,11 @@ def build_deploy_command() -> click.Command:
version
:
str
|
None
=
None
,
version
:
str
|
None
=
None
,
):
):
"""
"""
Deploy
🍱 to a
cluster
Deploy
a set of Dynamo services in a Bento to a K8s
cluster
\b
\b
BENTO is the serving target, it can be:
BENTO is the serving target, it can be:
- a tag to a Bento in local Bento store
- a tag to a Bento in local Bento store
- a folder containing a valid 'bentofile.yaml'
- a path to a built Bento
- a path to a built Bento
"""
"""
from
bentoml._internal.log
import
configure_server_logging
from
bentoml._internal.log
import
configure_server_logging
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/list.py
deleted
100644 → 0
View file @
0517f757
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
annotations
import
json
import
os
import
typing
as
t
import
bentoml
import
click
from
bentoml_cli.utils
import
BentoMLCommandGroup
from
kubernetes
import
client
,
config
from
rich
import
print
as
rich_print
from
rich.table
import
Table
def
build_list_command
()
->
click
.
Group
:
@
click
.
group
(
name
=
"list"
,
cls
=
BentoMLCommandGroup
)
def
cli
():
"""List resources"""
pass
@
cli
.
command
(
name
=
"bentos"
)
def
list_bentos
():
"""List all bentos in local store"""
bentos
=
bentoml
.
list
()
table
=
Table
(
box
=
None
,
expand
=
True
)
table
.
add_column
(
"Tag"
,
overflow
=
"fold"
)
table
.
add_column
(
"Service"
,
overflow
=
"fold"
)
table
.
add_column
(
"Created At"
,
overflow
=
"fold"
)
for
bento
in
bentos
:
table
.
add_row
(
str
(
bento
.
tag
),
bento
.
info
.
service
,
bento
.
info
.
creation_time
.
strftime
(
"%Y-%m-%d %H:%M:%S"
),
)
rich_print
(
table
)
@
cli
.
command
(
name
=
"deployments"
)
@
click
.
option
(
"--namespace"
,
type
=
click
.
STRING
,
help
=
"Kubernetes namespace"
)
@
click
.
option
(
"--cluster"
,
type
=
click
.
STRING
,
help
=
"Cluster name"
)
@
click
.
option
(
"-o"
,
"--output"
,
help
=
"Display the output of this command."
,
type
=
click
.
Choice
([
"json"
,
"table"
]),
default
=
"table"
,
)
def
list_deployments
(
namespace
:
str
|
None
=
None
,
cluster
:
str
|
None
=
None
,
output
:
t
.
Literal
[
"json"
,
"table"
]
=
"table"
,
):
"""List deployments"""
config
.
load_kube_config
()
api
=
client
.
CustomObjectsApi
()
# Define the group, version, and plural for the CRD
group
=
"nvidia.com"
version
=
"v1alpha1"
plural
=
"dynamodeployments"
# Get the deployments from the Kubernetes API
deployments
=
api
.
list_namespaced_custom_object
(
group
=
group
,
version
=
version
,
namespace
=
namespace
,
plural
=
plural
,
)
if
output
==
"json"
:
rich_print
(
json
.
dumps
(
deployments
,
indent
=
2
))
return
# Create table for output
table
=
Table
(
box
=
None
,
expand
=
True
)
table
.
add_column
(
"Name"
,
overflow
=
"fold"
)
table
.
add_column
(
"Namespace"
,
overflow
=
"fold"
)
table
.
add_column
(
"Status"
,
overflow
=
"fold"
)
table
.
add_column
(
"Created At"
,
overflow
=
"fold"
)
table
.
add_column
(
"Replicas"
,
overflow
=
"fold"
)
table
.
add_column
(
"Resources"
,
overflow
=
"fold"
)
table
.
add_column
(
"URL"
,
overflow
=
"fold"
)
ingress_suffix
=
os
.
getenv
(
"DYNAMO_INGRESS_SUFFIX"
,
"local"
)
for
item
in
deployments
.
get
(
"items"
,
[]):
metadata
=
item
.
get
(
"metadata"
,
{})
spec
=
item
.
get
(
"spec"
,
{})
services
=
spec
.
get
(
"services"
,
{}).
get
(
"main"
,
{}).
get
(
"spec"
,
{})
resources
=
services
.
get
(
"resources"
,
{})
ingress
=
services
.
get
(
"ingress"
,
{})
# Format resources
resources_str
=
(
f
"CPU:
{
resources
.
get
(
'requests'
,
{}
).
get
(
'cpu'
,
'N/A'
)
}
/
{
resources
.
get
(
'limits'
,
{}
).
get
(
'cpu'
,
'N/A'
)
}
\n
"
f
"Memory:
{
resources
.
get
(
'requests'
,
{}
).
get
(
'memory'
,
'N/A'
)
}
/
{
resources
.
get
(
'limits'
,
{}
).
get
(
'memory'
,
'N/A'
)
}
\n
"
f
"GPU:
{
resources
.
get
(
'requests'
,
{}
).
get
(
'gpu'
,
'N/A'
)
}
/
{
resources
.
get
(
'limits'
,
{}
).
get
(
'gpu'
,
'N/A'
)
}
"
)
# Format URL
url
=
(
f
"https://
{
ingress
.
get
(
'hostPrefix'
,
'N/A'
)
}
.
{
ingress_suffix
}
"
if
ingress
.
get
(
"enabled"
,
False
)
else
"N/A"
)
table
.
add_row
(
metadata
.
get
(
"name"
,
"N/A"
),
metadata
.
get
(
"namespace"
,
"N/A"
),
item
.
get
(
"status"
,
{}).
get
(
"state"
,
"Unknown"
),
metadata
.
get
(
"creationTimestamp"
,
"N/A"
),
f
"
{
services
.
get
(
'autoscaling'
,
{}
).
get
(
'minReplicas'
,
'N/A'
)
}
-
{
services
.
get
(
'autoscaling'
,
{}
).
get
(
'maxReplicas'
,
'N/A'
)
}
"
,
resources_str
,
url
,
)
rich_print
(
table
)
return
cli
list_command
=
build_list_command
()
deploy/dynamo/sdk/src/dynamo/sdk/cli/run.py
View file @
df51a622
...
@@ -23,9 +23,7 @@ import click
...
@@ -23,9 +23,7 @@ import click
def
build_run_command
()
->
click
.
Group
:
def
build_run_command
()
->
click
.
Group
:
from
bentoml_cli.utils
import
BentoMLCommandGroup
@
click
.
group
(
name
=
"run"
)
@
click
.
group
(
name
=
"run"
,
cls
=
BentoMLCommandGroup
)
def
cli
():
def
cli
():
pass
pass
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
View file @
df51a622
...
@@ -150,9 +150,9 @@ def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
...
@@ -150,9 +150,9 @@ def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
def
build_serve_command
()
->
click
.
Group
:
def
build_serve_command
()
->
click
.
Group
:
from
bentoml._internal.log
import
configure_server_logging
from
bentoml._internal.log
import
configure_server_logging
from
bentoml_cli.env_manager
import
env_manager
from
bentoml_cli.env_manager
import
env_manager
from
bentoml_cli.utils
import
AliasCommand
,
BentoMLCommandGroup
from
bentoml_cli.utils
import
AliasCommand
@
click
.
group
(
name
=
"serve"
,
cls
=
BentoMLCommandGroup
)
@
click
.
group
(
name
=
"serve"
)
def
cli
():
def
cli
():
pass
pass
...
@@ -335,7 +335,7 @@ def build_serve_command() -> click.Group:
...
@@ -335,7 +335,7 @@ def build_serve_command() -> click.Group:
timeout_graceful_shutdown
:
int
|
None
,
timeout_graceful_shutdown
:
int
|
None
,
**
attrs
:
t
.
Any
,
**
attrs
:
t
.
Any
,
)
->
None
:
)
->
None
:
"""
Start a HTTP BentoServer from a given 🍱
"""
Locally run connected Dynamo services
\b
\b
You can also pass service-specific configuration options using --ServiceName.param=value format.
You can also pass service-specific configuration options using --ServiceName.param=value format.
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/server.py
0 → 100644
View file @
df51a622
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
annotations
import
sys
import
click
import
rich
from
bentoml._internal.cloud.client
import
RestApiClient
from
bentoml._internal.cloud.config
import
(
DEFAULT_ENDPOINT
,
CloudClientConfig
,
CloudClientContext
,
)
from
bentoml._internal.configuration.containers
import
BentoMLContainer
from
bentoml._internal.utils.cattr
import
bentoml_cattr
from
bentoml.exceptions
import
CLIException
,
CloudRESTApiClientError
@
click
.
group
(
name
=
"server"
)
def
cloud_command
():
"""Interact with your Dynamo Server"""
@
cloud_command
.
command
()
@
click
.
option
(
"--endpoint"
,
type
=
click
.
STRING
,
help
=
"Dynamo Server endpoint"
,
default
=
DEFAULT_ENDPOINT
,
envvar
=
"DYNAMO_SERVER_API_ENDPOINT"
,
show_default
=
True
,
show_envvar
=
True
,
required
=
True
,
)
@
click
.
option
(
"--api-token"
,
type
=
click
.
STRING
,
help
=
"Dynamo Server user API token"
,
envvar
=
"DYNAMO_SERVER_API_KEY"
,
show_envvar
=
True
,
required
=
True
,
)
def
login
(
endpoint
:
str
,
api_token
:
str
)
->
None
:
# type: ignore
"""Connect to your Dynamo Server. You can find deployment instructions for this in our docs"""
try
:
cloud_rest_client
=
RestApiClient
(
endpoint
,
api_token
)
user
=
cloud_rest_client
.
v1
.
get_current_user
()
if
user
is
None
:
raise
CLIException
(
"current user is not found"
)
org
=
cloud_rest_client
.
v1
.
get_current_organization
()
if
org
is
None
:
raise
CLIException
(
"current organization is not found"
)
current_context_name
=
CloudClientConfig
.
get_config
().
current_context_name
cloud_context
=
BentoMLContainer
.
cloud_context
.
get
()
ctx
=
CloudClientContext
(
name
=
cloud_context
if
cloud_context
is
not
None
else
current_context_name
,
endpoint
=
endpoint
,
api_token
=
api_token
,
email
=
user
.
email
,
)
ctx
.
save
()
rich
.
print
(
f
":white_check_mark: Configured BentoCloud credentials (current-context:
{
ctx
.
name
}
)"
)
rich
.
print
(
f
":white_check_mark: Logged in as [blue]
{
user
.
email
}
[/] at [blue]
{
org
.
name
}
[/] organization"
)
except
CloudRESTApiClientError
as
e
:
if
e
.
error_code
==
401
:
rich
.
print
(
f
":police_car_light: Error validating token: HTTP 401: Bad credentials (
{
endpoint
}
/api-token)"
,
file
=
sys
.
stderr
,
)
else
:
rich
.
print
(
f
":police_car_light: Error validating token: HTTP
{
e
.
error_code
}
"
,
file
=
sys
.
stderr
,
)
@
cloud_command
.
command
()
def
current_context
()
->
None
:
# type: ignore
"""Get current cloud context."""
rich
.
print_json
(
data
=
bentoml_cattr
.
unstructure
(
CloudClientConfig
.
get_config
().
get_context
())
)
@
cloud_command
.
command
()
def
list_context
()
->
None
:
# type: ignore
"""List all available context."""
config
=
CloudClientConfig
.
get_config
()
rich
.
print_json
(
data
=
bentoml_cattr
.
unstructure
([
i
.
name
for
i
in
config
.
contexts
]))
@
cloud_command
.
command
()
@
click
.
argument
(
"context_name"
,
type
=
click
.
STRING
)
def
update_current_context
(
context_name
:
str
)
->
None
:
# type: ignore
"""Update current context"""
ctx
=
CloudClientConfig
.
get_config
().
set_current_context
(
context_name
)
rich
.
print
(
f
"Successfully switched to context:
{
ctx
.
name
}
"
)
deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
View file @
df51a622
...
@@ -484,7 +484,7 @@ def serve_http(
...
@@ -484,7 +484,7 @@ def serve_http(
hasattr
(
svc
,
"is_dynamo_component"
)
hasattr
(
svc
,
"is_dynamo_component"
)
and
svc
.
is_dynamo_component
()
and
svc
.
is_dynamo_component
()
)
)
else
'
Starting
production %s BentoServer from "%s"
(Press CTRL+C to quit)
'
else
"
Starting
%s
(Press CTRL+C to quit)
"
),
),
*
(
*
(
(
svc
.
name
,
*
svc
.
dynamo_address
(),
scheme
,
log_host
,
port
)
(
svc
.
name
,
*
svc
.
dynamo_address
(),
scheme
,
log_host
,
port
)
...
@@ -492,7 +492,7 @@ def serve_http(
...
@@ -492,7 +492,7 @@ def serve_http(
hasattr
(
svc
,
"is_dynamo_component"
)
hasattr
(
svc
,
"is_dynamo_component"
)
and
svc
.
is_dynamo_component
()
and
svc
.
is_dynamo_component
()
)
)
else
(
scheme
.
upper
(),
bento_identifier
)
else
(
bento_identifier
,
)
),
),
),
),
)
)
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/start.py
View file @
df51a622
...
@@ -19,24 +19,32 @@ import json
...
@@ -19,24 +19,32 @@ import json
import
logging
import
logging
import
os
import
os
import
sys
import
sys
import
typing
as
t
from
typing
import
Optional
from
typing
import
Optional
from
urllib.parse
import
urlparse
from
urllib.parse
import
urlparse
import
click
import
click
import
rich
import
rich
import
yaml
from
dynamo.sdk.cli.serve
import
_parse_service_args
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
def
build_start_command
()
->
click
.
Group
:
def
build_start_command
()
->
click
.
Group
:
from
bentoml._internal.utils
import
add_experimental_docstring
from
bentoml._internal.utils
import
add_experimental_docstring
from
bentoml_cli.utils
import
BentoMLCommandGroup
@
click
.
group
(
name
=
"start"
,
cls
=
BentoMLCommandGroup
)
@
click
.
group
(
name
=
"start"
)
def
cli
():
def
cli
():
pass
pass
@
cli
.
command
()
@
cli
.
command
(
context_settings
=
dict
(
ignore_unknown_options
=
True
,
allow_extra_args
=
True
,
),
)
@
click
.
argument
(
"bento"
,
type
=
click
.
STRING
,
default
=
"."
)
@
click
.
argument
(
"bento"
,
type
=
click
.
STRING
,
default
=
"."
)
@
click
.
option
(
@
click
.
option
(
"--service-name"
,
"--service-name"
,
...
@@ -46,6 +54,12 @@ def build_start_command() -> click.Group:
...
@@ -46,6 +54,12 @@ def build_start_command() -> click.Group:
envvar
=
"BENTOML_SERVE_SERVICE_NAME"
,
envvar
=
"BENTOML_SERVE_SERVICE_NAME"
,
help
=
"specify the runner name to serve"
,
help
=
"specify the runner name to serve"
,
)
)
@
click
.
option
(
"-f"
,
"--file"
,
type
=
click
.
Path
(
exists
=
True
),
help
=
"Path to YAML config file for service configuration"
,
)
@
click
.
option
(
@
click
.
option
(
"--depends"
,
"--depends"
,
type
=
click
.
STRING
,
type
=
click
.
STRING
,
...
@@ -137,15 +151,24 @@ def build_start_command() -> click.Group:
...
@@ -137,15 +151,24 @@ def build_start_command() -> click.Group:
help
=
"Reload Service when code changes detected"
,
help
=
"Reload Service when code changes detected"
,
default
=
False
,
default
=
False
,
)
)
@
click
.
option
(
"--dry-run"
,
is_flag
=
True
,
help
=
"Print the final service configuration and exit without starting the server"
,
default
=
False
,
)
@
add_experimental_docstring
@
add_experimental_docstring
def
start
(
def
start
(
ctx
:
click
.
Context
,
bento
:
str
,
bento
:
str
,
service_name
:
str
,
service_name
:
str
,
dry_run
:
bool
,
depends
:
Optional
[
list
[
str
]],
depends
:
Optional
[
list
[
str
]],
runner_map
:
Optional
[
str
],
runner_map
:
Optional
[
str
],
bind
:
Optional
[
str
],
bind
:
Optional
[
str
],
port
:
Optional
[
int
],
port
:
Optional
[
int
],
host
:
Optional
[
str
],
host
:
Optional
[
str
],
file
:
str
|
None
,
backlog
:
Optional
[
int
],
backlog
:
Optional
[
int
],
working_dir
:
Optional
[
str
],
working_dir
:
Optional
[
str
],
api_workers
:
Optional
[
int
],
api_workers
:
Optional
[
int
],
...
@@ -162,11 +185,44 @@ def build_start_command() -> click.Group:
...
@@ -162,11 +185,44 @@ def build_start_command() -> click.Group:
reload
:
bool
=
False
,
reload
:
bool
=
False
,
)
->
None
:
)
->
None
:
"""
"""
Start a
HTTP API server standalon
e. This will be used inside Yatai.
Start a
single Dynamo servic
e. This will be used inside Yatai.
"""
"""
from
bentoml
import
Service
from
bentoml
import
Service
from
bentoml._internal.service.loader
import
load
from
bentoml._internal.service.loader
import
load
service_configs
:
dict
[
str
,
dict
[
str
,
t
.
Any
]]
=
{}
# Load file if provided
if
file
:
with
open
(
file
)
as
f
:
yaml_configs
=
yaml
.
safe_load
(
f
)
# Initialize service_configs as empty dict if it's None
# Convert nested YAML structure to flat dict with dot notation
for
service
,
configs
in
yaml_configs
.
items
():
for
key
,
value
in
configs
.
items
():
if
service
not
in
service_configs
:
service_configs
[
service
]
=
{}
service_configs
[
service
][
key
]
=
value
# Process service-specific options
cmdline_overrides
:
t
.
Dict
[
str
,
t
.
Any
]
=
_parse_service_args
(
ctx
.
args
)
for
service
,
configs
in
cmdline_overrides
.
items
():
for
key
,
value
in
configs
.
items
():
if
service
not
in
service_configs
:
service_configs
[
service
]
=
{}
service_configs
[
service
][
key
]
=
value
if
dry_run
:
rich
.
print
(
"[bold]Service Configuration:[/bold]"
)
rich
.
print
(
json
.
dumps
(
service_configs
,
indent
=
2
))
rich
.
print
(
"
\n
[bold]Environment Variable that would be set:[/bold]"
)
rich
.
print
(
f
"DYNAMO_SERVICE_CONFIG=
{
json
.
dumps
(
service_configs
)
}
"
)
sys
.
exit
(
0
)
# Set environment variable with service configuration
if
service_configs
:
os
.
environ
[
"DYNAMO_SERVICE_CONFIG"
]
=
json
.
dumps
(
service_configs
)
if
working_dir
is
None
:
if
working_dir
is
None
:
if
os
.
path
.
isdir
(
os
.
path
.
expanduser
(
bento
)):
if
os
.
path
.
isdir
(
os
.
path
.
expanduser
(
bento
)):
working_dir
=
os
.
path
.
expanduser
(
bento
)
working_dir
=
os
.
path
.
expanduser
(
bento
)
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
0 → 100644
View file @
df51a622
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
typing
as
t
import
click
from
click
import
Command
,
Context
class
DynamoCommandGroup
(
click
.
Group
):
"""Simplified version of BentoMLCommandGroup for Dynamo CLI"""
def
__init__
(
self
,
*
args
:
t
.
Any
,
**
kwargs
:
t
.
Any
)
->
None
:
self
.
aliases
=
kwargs
.
pop
(
"aliases"
,
[])
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
_commands
:
dict
[
str
,
list
[
str
]]
=
{}
self
.
_aliases
:
dict
[
str
,
str
]
=
{}
def
add_command
(
self
,
cmd
:
Command
,
name
:
str
|
None
=
None
)
->
None
:
assert
cmd
.
callback
is
not
None
callback
=
cmd
.
callback
cmd
.
callback
=
callback
cmd
.
context_settings
[
"max_content_width"
]
=
120
aliases
=
getattr
(
cmd
,
"aliases"
,
None
)
if
aliases
:
assert
cmd
.
name
self
.
_commands
[
cmd
.
name
]
=
aliases
self
.
_aliases
.
update
({
alias
:
cmd
.
name
for
alias
in
aliases
})
return
super
().
add_command
(
cmd
,
name
)
def
add_subcommands
(
self
,
group
:
click
.
Group
)
->
None
:
if
not
isinstance
(
group
,
click
.
MultiCommand
):
raise
TypeError
(
"DynamoCommandGroup.add_subcommands only accepts click.MultiCommand"
)
if
isinstance
(
group
,
DynamoCommandGroup
):
# Common wrappers are already applied, call the super() method
for
name
,
cmd
in
group
.
commands
.
items
():
super
().
add_command
(
cmd
,
name
)
self
.
_commands
.
update
(
group
.
_commands
)
self
.
_aliases
.
update
(
group
.
_aliases
)
else
:
for
name
,
cmd
in
group
.
commands
.
items
():
self
.
add_command
(
cmd
,
name
)
def
resolve_alias
(
self
,
cmd_name
:
str
):
return
self
.
_aliases
[
cmd_name
]
if
cmd_name
in
self
.
_aliases
else
cmd_name
def
get_command
(
self
,
ctx
:
Context
,
cmd_name
:
str
)
->
Command
|
None
:
cmd_name
=
self
.
resolve_alias
(
cmd_name
)
return
super
().
get_command
(
ctx
,
cmd_name
)
def
add_single_command
(
self
,
group
:
click
.
Group
,
command_name
:
str
)
->
None
:
"""Add a single command from a group by name."""
if
not
isinstance
(
group
,
click
.
MultiCommand
):
raise
TypeError
(
"Only accepts click.MultiCommand"
)
ctx
=
click
.
Context
(
group
)
cmd
=
group
.
get_command
(
ctx
,
command_name
)
if
cmd
is
None
:
raise
ValueError
(
f
"Command '
{
command_name
}
' not found in group"
)
self
.
add_command
(
cmd
,
command_name
)
deploy/dynamo/sdk/src/dynamo/sdk/lib/dependency.py
View file @
df51a622
...
@@ -139,7 +139,7 @@ class DynamoDependency(Dependency[T]):
...
@@ -139,7 +139,7 @@ class DynamoDependency(Dependency[T]):
...
...
await dep.get_endpoint("generate") # equivalent to the following
await dep.get_endpoint("generate") # equivalent to the following
router_client = (
router_client = (
await runtime.namespace("dynamo
-init
")
await runtime.namespace("dynamo")
.component("router")
.component("router")
.endpoint("generate")
.endpoint("generate")
.client()
.client()
...
...
deploy/examples/llm/README.md
View file @
df51a622
...
@@ -27,7 +27,7 @@ This directory contains examples and reference implementations for deploying Lar
...
@@ -27,7 +27,7 @@ This directory contains examples and reference implementations for deploying Lar
## Deployment Architectures
## Deployment Architectures
###
Monolith
###
Aggregated
Single-instance deployment where both prefill and decode are done by the same worker.
Single-instance deployment where both prefill and decode are done by the same worker.
### Disaggregated
### Disaggregated
...
@@ -83,34 +83,28 @@ This figure shows an overview of the major components to deploy:
...
@@ -83,34 +83,28 @@ This figure shows an overview of the major components to deploy:
### Example architectures
### Example architectures
####
Router based worker
####
Aggregated serving
```
bash
```
bash
cd
/workspace/deploy/examples/llm
cd
/workspace/deploy/examples/llm
dynamo serve
monolith.router_based_deployment:Frontend
-f
./configs/monolith/router_based_deployment
.yaml
dynamo serve
graphs.agg:Frontend
-f
./configs/agg
.yaml
```
```
####
Routerless monolith
####
Aggregated serving with KV Routing
```
bash
```
bash
cd
/workspace/deploy/examples/llm
cd
/workspace/deploy/examples/llm
dynamo serve
monolith.routerless_deployment:Frontend
-f
./configs/monolith/routerless_deployment
.yaml
dynamo serve
graphs.agg_router:Frontend
-f
./configs/agg_router
.yaml
```
```
#### Routerless processor based monolith
#### Disaggregated serving
```
bash
dynamo serve monolith.routerless_processor_deployment:Frontend
-f
./configs/monolith/routerless_processor_deployment.yaml
```
#### Router based disaggregated serving
```
bash
```
bash
cd
/workspace/deploy/examples/llm
cd
/workspace/deploy/examples/llm
dynamo serve
disaggregated.router_based_deployment:Frontend
-f
./configs/disaggregated/router_based_deployment
.yaml
dynamo serve
graphs.disagg:Frontend
-f
./configs/disagg
.yaml
```
```
####
Routerless d
isaggregated serving
####
D
isaggregated serving
with KV Routing
```
bash
```
bash
cd
/workspace/deploy/examples/llm
cd
/workspace/deploy/examples/llm
dynamo serve
disaggregated.routerless_deployment:Frontend
-f
./configs/disaggregated/routerless_deployment
.yaml
dynamo serve
graphs.disagg_router:Frontend
-f
./configs/disagg_router
.yaml
```
```
### Client
### Client
...
...
deploy/examples/llm/components/kv_router.py
View file @
df51a622
...
@@ -67,7 +67,7 @@ def parse_args(service_name, prefix) -> Namespace:
...
@@ -67,7 +67,7 @@ def parse_args(service_name, prefix) -> Namespace:
@
service
(
@
service
(
dynamo
=
{
dynamo
=
{
"enabled"
:
True
,
"enabled"
:
True
,
"namespace"
:
"dynamo
-init
"
,
"namespace"
:
"dynamo"
,
},
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
workers
=
1
,
...
@@ -87,7 +87,7 @@ class Router:
...
@@ -87,7 +87,7 @@ class Router:
async
def
async_init
(
self
):
async
def
async_init
(
self
):
self
.
runtime
=
dynamo_context
[
"runtime"
]
self
.
runtime
=
dynamo_context
[
"runtime"
]
self
.
workers_client
=
(
self
.
workers_client
=
(
await
self
.
runtime
.
namespace
(
"dynamo
-init
"
)
await
self
.
runtime
.
namespace
(
"dynamo"
)
.
component
(
"VllmWorker"
)
.
component
(
"VllmWorker"
)
.
endpoint
(
"generate"
)
.
endpoint
(
"generate"
)
.
client
()
.
client
()
...
@@ -101,7 +101,7 @@ class Router:
...
@@ -101,7 +101,7 @@ class Router:
)
)
await
asyncio
.
sleep
(
2
)
await
asyncio
.
sleep
(
2
)
kv_listener
=
self
.
runtime
.
namespace
(
"dynamo
-init
"
).
component
(
"VllmWorker"
)
kv_listener
=
self
.
runtime
.
namespace
(
"dynamo"
).
component
(
"VllmWorker"
)
await
kv_listener
.
create_service
()
await
kv_listener
.
create_service
()
self
.
indexer
=
KvIndexer
(
kv_listener
,
self
.
args
.
block_size
)
self
.
indexer
=
KvIndexer
(
kv_listener
,
self
.
args
.
block_size
)
self
.
metrics_aggregator
=
KvMetricsAggregator
(
kv_listener
)
self
.
metrics_aggregator
=
KvMetricsAggregator
(
kv_listener
)
...
...
deploy/examples/llm/components/prefill_worker.py
View file @
df51a622
...
@@ -44,7 +44,7 @@ class RequestType(BaseModel):
...
@@ -44,7 +44,7 @@ class RequestType(BaseModel):
@
service
(
@
service
(
dynamo
=
{
dynamo
=
{
"enabled"
:
True
,
"enabled"
:
True
,
"namespace"
:
"dynamo
-init
"
,
"namespace"
:
"dynamo"
,
},
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
workers
=
1
,
...
@@ -89,7 +89,7 @@ class PrefillWorker:
...
@@ -89,7 +89,7 @@ class PrefillWorker:
raise
RuntimeError
(
"Failed to initialize engine client"
)
raise
RuntimeError
(
"Failed to initialize engine client"
)
runtime
=
dynamo_context
[
"runtime"
]
runtime
=
dynamo_context
[
"runtime"
]
metadata
=
self
.
engine_client
.
nixl_metadata
metadata
=
self
.
engine_client
.
nixl_metadata
self
.
_metadata_store
=
NixlMetadataStore
(
"dynamo
-init
"
,
runtime
)
self
.
_metadata_store
=
NixlMetadataStore
(
"dynamo"
,
runtime
)
await
self
.
_metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
await
self
.
_metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
task
=
asyncio
.
create_task
(
self
.
prefill_queue_handler
())
task
=
asyncio
.
create_task
(
self
.
prefill_queue_handler
())
task
.
add_done_callback
(
lambda
_
:
print
(
"prefill queue handler created"
))
task
.
add_done_callback
(
lambda
_
:
print
(
"prefill queue handler created"
))
...
...
deploy/examples/llm/components/processor.py
View file @
df51a622
...
@@ -41,7 +41,7 @@ class RequestType(Enum):
...
@@ -41,7 +41,7 @@ class RequestType(Enum):
@
service
(
@
service
(
dynamo
=
{
dynamo
=
{
"enabled"
:
True
,
"enabled"
:
True
,
"namespace"
:
"dynamo
-init
"
,
"namespace"
:
"dynamo"
,
},
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
workers
=
1
,
...
...
deploy/examples/llm/components/routerless/prefill_worker.py
View file @
df51a622
...
@@ -37,7 +37,7 @@ from dynamo.sdk import (
...
@@ -37,7 +37,7 @@ from dynamo.sdk import (
@
service
(
@
service
(
dynamo
=
{
dynamo
=
{
"enabled"
:
True
,
"enabled"
:
True
,
"namespace"
:
"dynamo
-init
"
,
"namespace"
:
"dynamo"
,
},
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
workers
=
1
,
...
@@ -82,7 +82,7 @@ class PrefillWorkerRouterLess:
...
@@ -82,7 +82,7 @@ class PrefillWorkerRouterLess:
raise
RuntimeError
(
"Failed to initialize engine client"
)
raise
RuntimeError
(
"Failed to initialize engine client"
)
runtime
=
dynamo_context
[
"runtime"
]
runtime
=
dynamo_context
[
"runtime"
]
metadata
=
self
.
engine_client
.
nixl_metadata
metadata
=
self
.
engine_client
.
nixl_metadata
self
.
_metadata_store
=
NixlMetadataStore
(
"dynamo
-init
"
,
runtime
)
self
.
_metadata_store
=
NixlMetadataStore
(
"dynamo"
,
runtime
)
await
self
.
_metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
await
self
.
_metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
@
dynamo_endpoint
()
@
dynamo_endpoint
()
...
...
deploy/examples/llm/components/routerless/worker.py
View file @
df51a622
...
@@ -41,7 +41,7 @@ from dynamo.sdk import (
...
@@ -41,7 +41,7 @@ from dynamo.sdk import (
@
service
(
@
service
(
dynamo
=
{
dynamo
=
{
"enabled"
:
True
,
"enabled"
:
True
,
"namespace"
:
"dynamo
-init
"
,
"namespace"
:
"dynamo"
,
},
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
workers
=
1
,
...
@@ -87,7 +87,7 @@ class VllmWorkerRouterLess:
...
@@ -87,7 +87,7 @@ class VllmWorkerRouterLess:
runtime
=
dynamo_context
[
"runtime"
]
runtime
=
dynamo_context
[
"runtime"
]
if
self
.
engine_args
.
remote_prefill
:
if
self
.
engine_args
.
remote_prefill
:
metadata
=
self
.
engine_client
.
nixl_metadata
metadata
=
self
.
engine_client
.
nixl_metadata
metadata_store
=
NixlMetadataStore
(
"dynamo
-init
"
,
runtime
)
metadata_store
=
NixlMetadataStore
(
"dynamo"
,
runtime
)
await
metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
await
metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
models
=
OpenAIServingModels
(
models
=
OpenAIServingModels
(
...
...
deploy/examples/llm/components/worker.py
View file @
df51a622
...
@@ -44,7 +44,7 @@ from dynamo.sdk import (
...
@@ -44,7 +44,7 @@ from dynamo.sdk import (
@
service
(
@
service
(
dynamo
=
{
dynamo
=
{
"enabled"
:
True
,
"enabled"
:
True
,
"namespace"
:
"dynamo
-init
"
,
"namespace"
:
"dynamo"
,
},
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
workers
=
1
,
...
@@ -87,7 +87,7 @@ class VllmWorker:
...
@@ -87,7 +87,7 @@ class VllmWorker:
if
self
.
engine_args
.
router
==
"kv"
:
if
self
.
engine_args
.
router
==
"kv"
:
VLLM_WORKER_ID
=
dynamo_context
[
"endpoints"
][
0
].
lease_id
()
VLLM_WORKER_ID
=
dynamo_context
[
"endpoints"
][
0
].
lease_id
()
os
.
environ
[
"VLLM_WORKER_ID"
]
=
str
(
VLLM_WORKER_ID
)
os
.
environ
[
"VLLM_WORKER_ID"
]
=
str
(
VLLM_WORKER_ID
)
os
.
environ
[
"VLLM_KV_NAMESPACE"
]
=
"dynamo
-init
"
os
.
environ
[
"VLLM_KV_NAMESPACE"
]
=
"dynamo"
os
.
environ
[
"VLLM_KV_COMPONENT"
]
=
class_name
os
.
environ
[
"VLLM_KV_COMPONENT"
]
=
class_name
vllm_logger
.
info
(
f
"Generate endpoint ID:
{
VLLM_WORKER_ID
}
"
)
vllm_logger
.
info
(
f
"Generate endpoint ID:
{
VLLM_WORKER_ID
}
"
)
# note: worker_index is 1-based, but CUDA_VISIBLE_DEVICES is 0-based
# note: worker_index is 1-based, but CUDA_VISIBLE_DEVICES is 0-based
...
@@ -131,7 +131,7 @@ class VllmWorker:
...
@@ -131,7 +131,7 @@ class VllmWorker:
if
self
.
engine_args
.
remote_prefill
:
if
self
.
engine_args
.
remote_prefill
:
metadata
=
self
.
engine_client
.
nixl_metadata
metadata
=
self
.
engine_client
.
nixl_metadata
metadata_store
=
NixlMetadataStore
(
"dynamo
-init
"
,
runtime
)
metadata_store
=
NixlMetadataStore
(
"dynamo"
,
runtime
)
await
metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
await
metadata_store
.
put
(
metadata
.
engine_id
,
metadata
)
if
self
.
engine_args
.
conditional_disagg
:
if
self
.
engine_args
.
conditional_disagg
:
...
...
deploy/examples/llm/configs/
monolith/routerless_processor_deployment
.yaml
→
deploy/examples/llm/configs/
agg
.yaml
View file @
df51a622
...
@@ -15,14 +15,14 @@
...
@@ -15,14 +15,14 @@
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo
-init
.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
Processor
:
Processor
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size
:
64
block-size
:
64
max-model-len
:
16384
max-model-len
:
16384
router
:
r
andom
router
:
r
ound-robin
VllmWorker
:
VllmWorker
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
...
...
deploy/examples/llm/configs/
monolith/router_based_deployment
.yaml
→
deploy/examples/llm/configs/
agg_router
.yaml
View file @
df51a622
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo
-init
.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
Processor
:
Processor
:
...
@@ -39,6 +39,4 @@ VllmWorker:
...
@@ -39,6 +39,4 @@ VllmWorker:
router
:
kv
router
:
kv
tensor-parallel-size
:
1
tensor-parallel-size
:
1
ServiceArgs
:
ServiceArgs
:
workers
:
2
workers
:
1
envs
:
-
CUDA_VISIBLE_DEVICES
:
'
0,1'
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment