Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
3136b716
Commit
3136b716
authored
Apr 10, 2025
by
ishandhanani
Committed by
GitHub
Apr 10, 2025
Browse files
feat: save `dynamo serve` local state for planner and cleanups (#560)
parent
07f2f0ad
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
145 additions
and
80 deletions
+145
-80
deploy/dynamo/sdk/src/dynamo/sdk/cli/allocator.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/allocator.py
+5
-5
deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
+8
-0
deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
+99
-75
deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
+33
-0
No files found.
deploy/dynamo/sdk/src/dynamo/sdk/cli/allocator.py
View file @
3136b716
...
...
@@ -28,8 +28,8 @@ from bentoml.exceptions import BentoMLConfigException
from
simple_di
import
Provide
,
inject
NVIDIA_GPU
=
"nvidia.com/gpu"
DISABLE_GPU_ALLOCATION
_ENV
=
"DYN
AMO
_DISABLE_GPU_ALLOCATION"
DYN
AMO
_DEPLOYMENT_ENV
=
"DYN
AMO
_DEPLOYMENT_ENV"
DYN_
DISABLE_
AUTO_
GPU_ALLOCATION
=
"DYN_DISABLE_
AUTO_
GPU_ALLOCATION"
DYN_DEPLOYMENT_ENV
=
"DYN_DEPLOYMENT_ENV"
class
ResourceAllocator
:
...
...
@@ -45,7 +45,7 @@ class ResourceAllocator:
if
count
>
self
.
remaining_gpus
:
warnings
.
warn
(
f
"Requested
{
count
}
GPUs, but only
{
self
.
remaining_gpus
}
are remaining. "
f
"Serving may fail due to inadequate GPUs. Set
{
DISABLE_GPU_ALLOCATION
_ENV
}
=1 "
f
"Serving may fail due to inadequate GPUs. Set
{
DYN_
DISABLE_
AUTO_
GPU_ALLOCATION
}
=1 "
"to disable automatic allocation and allocate GPUs manually."
,
ResourceWarning
,
stacklevel
=
3
,
...
...
@@ -117,8 +117,8 @@ class ResourceAllocator:
return
num_workers
,
resource_envs
else
:
# workers is a number
num_workers
=
workers
if
num_gpus
and
DISABLE_GPU_ALLOCATION
_ENV
not
in
os
.
environ
:
if
os
.
environ
.
get
(
DYN
AMO
_DEPLOYMENT_ENV
):
if
num_gpus
and
DYN_
DISABLE_
AUTO_
GPU_ALLOCATION
not
in
os
.
environ
:
if
os
.
environ
.
get
(
DYN_DEPLOYMENT_ENV
):
# K8s replicas: Assumes DYNAMO_DEPLOYMENT_ENV is set
# each pod in replicaset will have separate GPU with same CUDA_VISIBLE_DEVICES
assigned
=
self
.
assign_gpus
(
num_gpus
)
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/serve.py
View file @
3136b716
...
...
@@ -189,6 +189,12 @@ def build_serve_command() -> click.Group:
help
=
"Print the final service configuration and exit without starting the server"
,
default
=
False
,
)
@
click
.
option
(
"--enable-planner"
,
is_flag
=
True
,
help
=
"Save a snapshot of your service state to a file that allows planner to edit your deployment configuration"
,
default
=
False
,
)
@
click
.
pass_context
def
serve
(
ctx
:
click
.
Context
,
...
...
@@ -200,6 +206,7 @@ def build_serve_command() -> click.Group:
host
:
str
,
file
:
str
|
None
,
working_dir
:
str
|
None
,
enable_planner
:
bool
,
**
attrs
:
t
.
Any
,
)
->
None
:
"""Locally run connected Dynamo services. You can pass service-specific configuration options using --ServiceName.param=value format."""
...
...
@@ -270,6 +277,7 @@ def build_serve_command() -> click.Group:
port
=
port
,
dependency_map
=
runner_map_dict
,
service_name
=
service_name
,
enable_planner
=
enable_planner
,
)
return
cli
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/serving.py
View file @
3136b716
...
...
@@ -44,7 +44,12 @@ from circus.watcher import Watcher
from
simple_di
import
Provide
,
inject
from
.allocator
import
ResourceAllocator
from
.utils
import
path_to_uri
,
reserve_free_port
from
.utils
import
(
DYN_LOCAL_STATE_DIR
,
path_to_uri
,
reserve_free_port
,
save_dynamo_state
,
)
# Define a Protocol for services to ensure type safety
...
...
@@ -57,6 +62,9 @@ class ServiceProtocol(Protocol):
def
is_dynamo_component
(
self
)
->
bool
:
...
def
dynamo_address
(
self
)
->
tuple
[
str
,
str
]:
...
# Use Protocol as the base for type alias
AnyService
=
TypeVar
(
"AnyService"
,
bound
=
ServiceProtocol
)
...
...
@@ -115,7 +123,37 @@ else:
# WARNING: internal
_SERVICE_WORKER_SCRIPT
=
"_bentoml_impl.worker.service"
_BENTO_WORKER_SCRIPT
=
"_bentoml_impl.worker.service"
_DYNAMO_WORKER_SCRIPT
=
"dynamo.sdk.cli.serve_dynamo"
def
_get_dynamo_worker_script
(
bento_identifier
:
str
,
svc_name
:
str
)
->
list
[
str
]:
args
=
[
"-m"
,
_DYNAMO_WORKER_SCRIPT
,
bento_identifier
,
"--service-name"
,
svc_name
,
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
return
args
def
_get_bento_worker_script
(
bento_identifier
:
str
,
svc_name
:
str
)
->
list
[
str
]:
args
=
[
"-m"
,
_BENTO_WORKER_SCRIPT
,
bento_identifier
,
"--service-name"
,
svc_name
,
"--fd"
,
f
"$(circus.sockets.
{
svc_name
}
)"
,
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
return
args
def
create_dependency_watcher
(
...
...
@@ -131,18 +169,7 @@ def create_dependency_watcher(
num_workers
,
resource_envs
=
scheduler
.
get_resource_envs
(
svc
)
uri
,
socket
=
_get_server_socket
(
svc
,
uds_path
,
port_stack
)
args
=
[
"-m"
,
_SERVICE_WORKER_SCRIPT
,
bento_identifier
,
"--service-name"
,
svc
.
name
,
"--fd"
,
f
"$(circus.sockets.
{
svc
.
name
}
)"
,
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
args
=
_get_bento_worker_script
(
bento_identifier
,
svc
.
name
)
if
resource_envs
:
args
.
extend
([
"--worker-env"
,
json
.
dumps
(
resource_envs
)])
...
...
@@ -168,23 +195,9 @@ def create_dynamo_watcher(
"""Create a watcher for a Dynamo service in the dependency graph"""
from
bentoml.serving
import
create_watcher
# Get socket for this service
uri
,
socket
=
_get_server_socket
(
svc
,
uds_path
,
port_stack
)
# Get worker configuration
num_workers
,
resource_envs
=
scheduler
.
get_resource_envs
(
svc
)
# Create Dynamo-specific worker args
args
=
[
"-m"
,
"dynamo.sdk.cli.serve_dynamo"
,
# Use our Dynamo worker module
bento_identifier
,
"--service-name"
,
svc
.
name
,
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
uri
,
socket
=
_get_server_socket
(
svc
,
uds_path
,
port_stack
)
args
=
_get_dynamo_worker_script
(
bento_identifier
,
svc
.
name
)
if
resource_envs
:
args
.
extend
([
"--worker-env"
,
json
.
dumps
(
resource_envs
)])
...
...
@@ -209,41 +222,21 @@ def create_dynamo_watcher(
except
json
.
JSONDecodeError
as
e
:
logger
.
warning
(
f
"Failed to parse DYNAMO_SERVICE_ENVS:
{
e
}
"
)
# use namespace from the service
namespace
,
_
=
svc
.
dynamo_address
()
# Create the watcher with updated environment
watcher
=
create_watcher
(
name
=
f
"
dy
nam
o_servi
ce_
{
svc
.
name
}
"
,
name
=
f
"
{
nam
espa
ce
}
_
{
svc
.
name
}
"
,
args
=
args
,
numprocesses
=
num_workers
,
working_dir
=
working_dir
,
env
=
worker_env
,
)
return
watcher
,
socket
,
uri
@
inject
def
server_on_deployment
(
svc
:
ServiceProtocol
,
result_file
:
str
=
Provide
[
BentoMLContainer
.
result_store_file
]
)
->
None
:
# Resolve models before server starts.
if
hasattr
(
svc
,
"bento"
)
and
(
bento
:
=
getattr
(
svc
,
"bento"
)):
for
model
in
bento
.
info
.
all_models
:
model
.
to_model
().
resolve
()
elif
hasattr
(
svc
,
"models"
):
for
model
in
svc
.
models
:
model
.
resolve
()
logger
.
info
(
f
"Created watcher for
{
svc
.
name
}
's in the
{
namespace
}
namespace"
)
if
hasattr
(
svc
,
"inner"
):
inner
=
svc
.
inner
for
name
in
dir
(
inner
):
member
=
getattr
(
inner
,
name
)
if
callable
(
member
)
and
getattr
(
member
,
"__bentoml_deployment_hook__"
,
False
):
member
()
if
os
.
path
.
exists
(
result_file
):
os
.
remove
(
result_file
)
return
watcher
,
socket
,
uri
@
inject
(
squeeze_none
=
True
)
...
...
@@ -254,6 +247,7 @@ def serve_http(
port
:
int
=
Provide
[
BentoMLContainer
.
http
.
port
],
dependency_map
:
dict
[
str
,
str
]
|
None
=
None
,
service_name
:
str
=
""
,
enable_planner
:
bool
=
False
,
)
->
Server
:
# WARNING: internal
from
_bentoml_impl.loader
import
load
...
...
@@ -263,9 +257,14 @@ def serve_http(
from
bentoml.serving
import
create_watcher
from
circus.sockets
import
CircusSocket
from
dynamo.sdk.lib.logging
import
configure_server_logging
from
.allocator
import
ResourceAllocator
configure_server_logging
()
bento_id
:
str
=
""
namespace
:
str
=
""
env
:
dict
[
str
,
Any
]
=
{}
if
isinstance
(
bento_identifier
,
Service
):
svc
=
bento_identifier
...
...
@@ -296,7 +295,6 @@ def serve_http(
if
service_name
and
service_name
!=
svc
.
name
:
svc
=
svc
.
find_dependent_by_name
(
service_name
)
num_workers
,
resource_envs
=
allocator
.
get_resource_envs
(
svc
)
server_on_deployment
(
svc
)
uds_path
=
tempfile
.
mkdtemp
(
prefix
=
"bentoml-uds-"
)
try
:
if
not
service_name
and
not
standalone
:
...
...
@@ -321,6 +319,7 @@ def serve_http(
str
(
bento_path
.
absolute
()),
env
=
env
,
)
namespace
,
_
=
dep_svc
.
dynamo_address
()
else
:
# Regular BentoML service
new_watcher
,
new_socket
,
uri
=
create_dependency_watcher
(
...
...
@@ -336,7 +335,6 @@ def serve_http(
watchers
.
append
(
new_watcher
)
sockets
.
append
(
new_socket
)
dependency_map
[
name
]
=
uri
server_on_deployment
(
dep_svc
)
# reserve one more to avoid conflicts
port_stack
.
enter_context
(
reserve_free_port
())
...
...
@@ -365,7 +363,7 @@ def serve_http(
server_args
=
[
"-m"
,
_
SERVICE
_WORKER_SCRIPT
,
_
BENTO
_WORKER_SCRIPT
,
bento_identifier
,
"--fd"
,
f
"$(circus.sockets.
{
API_SERVER_NAME
}
)"
,
...
...
@@ -374,6 +372,15 @@ def serve_http(
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
dynamo_args
=
[
"-m"
,
_DYNAMO_WORKER_SCRIPT
,
bento_identifier
,
"--service-name"
,
svc
.
name
,
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
if
resource_envs
:
server_args
.
extend
([
"--worker-env"
,
json
.
dumps
(
resource_envs
)])
...
...
@@ -381,20 +388,10 @@ def serve_http(
# Check if this is a Dynamo service
if
hasattr
(
svc
,
"is_dynamo_component"
)
and
svc
.
is_dynamo_component
():
# Create Dynamo-specific watcher using existing socket
args
=
[
"-m"
,
"dynamo.sdk.cli.serve_dynamo"
,
# Use our Dynamo worker module
bento_identifier
,
"--service-name"
,
svc
.
name
,
"--worker-id"
,
"$(CIRCUS.WID)"
,
]
# resource_envs is the resource allocation (ie CUDA_VISIBLE_DEVICES) for each worker created by the allocator
# these resource_envs are passed to each individual worker's environment which is set in serve_dynamo
if
resource_envs
:
args
.
extend
([
"--worker-env"
,
json
.
dumps
(
resource_envs
)])
dynamo_
args
.
extend
([
"--worker-env"
,
json
.
dumps
(
resource_envs
)])
# env is the base bentoml environment variables. We make a copy and update it to add any service configurations and additional env vars
worker_env
=
env
.
copy
()
if
env
else
{}
...
...
@@ -416,16 +413,17 @@ def serve_http(
logger
.
warning
(
f
"Failed to parse DYNAMO_SERVICE_ENVS:
{
e
}
"
)
watcher
=
create_watcher
(
name
=
f
"
dy
nam
o_servi
ce_
{
svc
.
name
}
"
,
args
=
args
,
name
=
f
"
{
nam
espa
ce
}
_
{
svc
.
name
}
"
,
args
=
dynamo_
args
,
numprocesses
=
num_workers
,
working_dir
=
str
(
bento_path
.
absolute
()),
env
=
worker_env
,
# Dependency map will be injected by serve_http
env
=
worker_env
,
)
watchers
.
append
(
watcher
)
logger
.
info
(
f
"dynamo_service_
{
svc
.
name
}
entrypoint created"
)
logger
.
info
(
f
"Created watcher for
{
svc
.
name
}
's in the
{
namespace
}
namespace"
)
else
:
# Create regular BentoML service watcher
watchers
.
append
(
create_watcher
(
name
=
"service"
,
...
...
@@ -455,6 +453,32 @@ def serve_http(
arbiter
=
create_standalone_arbiter
(
**
arbiter_kwargs
)
arbiter
.
exit_stack
.
callback
(
shutil
.
rmtree
,
uds_path
,
ignore_errors
=
True
)
if
enable_planner
:
arbiter
.
exit_stack
.
callback
(
shutil
.
rmtree
,
os
.
environ
.
get
(
DYN_LOCAL_STATE_DIR
,
os
.
path
.
expanduser
(
"~/.dynamo/state"
)
),
ignore_errors
=
True
,
)
logger
.
warn
(
f
"arbiter:
{
arbiter
.
endpoint
}
"
)
# save deployment state for planner
if
not
namespace
:
raise
ValueError
(
"No namespace found for service"
)
save_dynamo_state
(
namespace
,
arbiter
.
endpoint
,
components
=
{
watcher
.
name
:
{
"watcher_name"
:
watcher
.
name
,
"cmd"
:
watcher
.
cmd
+
" "
.
join
(
watcher
.
args
),
}
for
watcher
in
watchers
},
environment
=
{
"DYNAMO_SERVICE_CONFIG"
:
os
.
environ
[
"DYNAMO_SERVICE_CONFIG"
],
},
)
arbiter
.
start
(
cb
=
lambda
_
:
logger
.
info
(
# type: ignore
(
...
...
deploy/dynamo/sdk/src/dynamo/sdk/cli/utils.py
View file @
3136b716
...
...
@@ -16,6 +16,8 @@
# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
import
contextlib
import
json
import
logging
import
os
import
pathlib
import
random
...
...
@@ -26,6 +28,10 @@ import click
import
psutil
from
click
import
Command
,
Context
logger
=
logging
.
getLogger
(
__name__
)
DYN_LOCAL_STATE_DIR
=
"DYN_LOCAL_STATE_DIR"
class
DynamoCommandGroup
(
click
.
Group
):
"""Simplified version of BentoMLCommandGroup for Dynamo CLI"""
...
...
@@ -149,3 +155,30 @@ def path_to_uri(path: str) -> str:
if
psutil
.
POSIX
:
return
pathlib
.
PurePosixPath
(
path
).
as_uri
()
raise
ValueError
(
"Unsupported OS"
)
def
save_dynamo_state
(
namespace
:
str
,
circus_endpoint
:
str
,
components
:
dict
[
str
,
t
.
Any
],
environment
:
dict
[
str
,
t
.
Any
],
):
state_dir
=
os
.
environ
.
get
(
DYN_LOCAL_STATE_DIR
,
os
.
path
.
expanduser
(
"~/.dynamo/state"
)
)
os
.
makedirs
(
state_dir
,
exist_ok
=
True
)
# create the state object
state
=
{
"namespace"
:
namespace
,
"circus_endpoint"
:
circus_endpoint
,
"components"
:
components
,
"environment"
:
environment
,
}
# save the state object to a file
state_file
=
os
.
path
.
join
(
state_dir
,
f
"
{
namespace
}
.json"
)
with
open
(
state_file
,
"w"
)
as
f
:
json
.
dump
(
state
,
f
)
logger
.
warning
(
f
"Saved state to
{
state_file
}
"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment