Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d675d221
Unverified
Commit
d675d221
authored
May 08, 2025
by
Biswa Panda
Committed by
GitHub
May 09, 2025
Browse files
feat: decouple dynamo sdk to support mutiple deployment targets (#905)
parent
5d5235bc
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
28 additions
and
91 deletions
+28
-91
deploy/sdk/src/dynamo/sdk/lib/service.py
deploy/sdk/src/dynamo/sdk/lib/service.py
+4
-35
deploy/sdk/src/dynamo/sdk/tests/pipeline.py
deploy/sdk/src/dynamo/sdk/tests/pipeline.py
+5
-10
deploy/sdk/src/dynamo/sdk/tests/test_e2e.py
deploy/sdk/src/dynamo/sdk/tests/test_e2e.py
+2
-2
deploy/sdk/src/dynamo/sdk/tests/test_link.py
deploy/sdk/src/dynamo/sdk/tests/test_link.py
+2
-2
docs/backend.md
docs/backend.md
+0
-7
examples/hello_world/disagg_skeleton/components/frontend.py
examples/hello_world/disagg_skeleton/components/frontend.py
+3
-4
examples/hello_world/disagg_skeleton/components/kv_router.py
examples/hello_world/disagg_skeleton/components/kv_router.py
+0
-1
examples/hello_world/hello_world.py
examples/hello_world/hello_world.py
+10
-13
examples/llm/components/frontend.py
examples/llm/components/frontend.py
+0
-4
examples/llm/components/kv_router.py
examples/llm/components/kv_router.py
+0
-1
examples/llm/components/planner_service.py
examples/llm/components/planner_service.py
+0
-1
examples/llm/components/prefill_worker.py
examples/llm/components/prefill_worker.py
+0
-1
examples/llm/components/processor.py
examples/llm/components/processor.py
+0
-1
examples/llm/components/worker.py
examples/llm/components/worker.py
+0
-1
examples/multimodal/components/encode_worker.py
examples/multimodal/components/encode_worker.py
+0
-1
examples/multimodal/components/frontend.py
examples/multimodal/components/frontend.py
+2
-3
examples/multimodal/components/prefill_worker.py
examples/multimodal/components/prefill_worker.py
+0
-1
examples/multimodal/components/processor.py
examples/multimodal/components/processor.py
+0
-1
examples/multimodal/components/worker.py
examples/multimodal/components/worker.py
+0
-1
examples/sglang/components/frontend.py
examples/sglang/components/frontend.py
+0
-1
No files found.
deploy/sdk/src/dynamo/sdk/lib/service.py
View file @
d675d221
...
...
@@ -17,7 +17,6 @@ from __future__ import annotations
import
json
import
logging
import
os
from
collections
import
defaultdict
from
dataclasses
import
asdict
,
dataclass
from
enum
import
Enum
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Set
,
Tuple
,
TypeVar
,
Union
...
...
@@ -28,6 +27,7 @@ from _bentoml_sdk.images import Image
from
_bentoml_sdk.service.config
import
validate
from
fastapi
import
FastAPI
from
dynamo.sdk.core.protocol.interface
import
DynamoTransport
,
LinkedServices
from
dynamo.sdk.lib.decorators
import
DynamoEndpoint
T
=
TypeVar
(
"T"
,
bound
=
object
)
...
...
@@ -45,32 +45,6 @@ class ComponentType(str, Enum):
# etc.
class
RuntimeLinkedServices
:
"""
A class to track the linked services in the runtime.
"""
def
__init__
(
self
)
->
None
:
self
.
edges
:
Dict
[
DynamoService
,
Set
[
DynamoService
]]
=
defaultdict
(
set
)
def
add
(
self
,
edge
:
Tuple
[
DynamoService
,
DynamoService
]):
src
,
dest
=
edge
self
.
edges
[
src
].
add
(
dest
.
inner
)
# track the dest node as well so we can cleanup later
self
.
edges
[
dest
]
def
remove_unused_edges
(
self
):
# this method is idempotent
if
not
self
.
edges
:
return
# remove edges that are not in the current service
for
u
,
vertices
in
self
.
edges
.
items
():
u
.
remove_unused_edges
(
used_edges
=
vertices
)
LinkedServices
=
RuntimeLinkedServices
()
@
dataclass
class
DynamoConfig
:
"""Configuration for Dynamo components"""
...
...
@@ -152,7 +126,9 @@ class DynamoService(Service[T]):
value
=
getattr
(
inner
,
field
)
if
isinstance
(
value
,
DynamoEndpoint
):
self
.
_dynamo_endpoints
[
value
.
name
]
=
value
if
getattr
(
value
,
"is_api"
,
False
):
if
DynamoTransport
.
HTTP
in
getattr
(
value
,
"_transports"
,
[
DynamoTransport
.
DEFAULT
]
):
# Ensure endpoint path starts with '/'
path
=
(
value
.
name
if
value
.
name
.
startswith
(
"/"
)
else
f
"/
{
value
.
name
}
"
...
...
@@ -174,15 +150,8 @@ class DynamoService(Service[T]):
return
service_config
.
get
(
"ServiceArgs"
)
return
None
def
is_dynamo_component
(
self
)
->
bool
:
"""Check if this service is configured as a Dynamo component"""
return
self
.
_dynamo_config
.
enabled
def
dynamo_address
(
self
)
->
Tuple
[
Optional
[
str
],
Optional
[
str
]]:
"""Get the Dynamo address for this component in namespace/name format"""
if
not
self
.
is_dynamo_component
():
raise
ValueError
(
"Service is not configured as a Dynamo component"
)
# Check if we have a runner map with Dynamo address
runner_map
=
os
.
environ
.
get
(
"BENTOML_RUNNER_MAP"
)
if
runner_map
:
...
...
deploy/sdk/src/dynamo/sdk/tests/pipeline.py
View file @
d675d221
...
...
@@ -17,11 +17,11 @@
# Use this to test changes made to CLI, SDK, etc
from
fastapi
import
FastAPI
from
fastapi.responses
import
StreamingResponse
from
pydantic
import
BaseModel
from
dynamo.sdk
import
depends
,
dynamo_endpoint
,
service
from
dynamo.sdk.core.protocol.interface
import
DynamoTransport
"""
Pipeline Architecture:
...
...
@@ -56,14 +56,10 @@ class ResponseType(BaseModel):
GPU_ENABLED
=
False
app
=
FastAPI
(
title
=
"Hello World!"
)
@
service
(
resources
=
{
"cpu"
:
"1"
},
traffic
=
{
"timeout"
:
30
},
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
,
},
workers
=
1
,
...
...
@@ -94,7 +90,7 @@ class Backend:
@
service
(
resources
=
{
"cpu"
:
"2"
},
traffic
=
{
"timeout"
:
30
},
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
},
dynamo
=
{
"namespace"
:
"inference"
},
)
class
Backend2
:
backend
=
depends
(
Backend
)
...
...
@@ -116,7 +112,7 @@ class Backend2:
@
service
(
resources
=
{
"cpu"
:
"1"
},
traffic
=
{
"timeout"
:
30
},
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
},
dynamo
=
{
"namespace"
:
"inference"
},
)
class
Middle
:
backend
=
depends
(
Backend
)
...
...
@@ -150,8 +146,7 @@ class Middle:
@
service
(
resources
=
{
"cpu"
:
"1"
},
traffic
=
{
"timeout"
:
60
},
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
},
app
=
app
,
dynamo
=
{
"namespace"
:
"inference"
},
)
class
Frontend
:
middle
=
depends
(
Middle
)
...
...
@@ -160,7 +155,7 @@ class Frontend:
def
__init__
(
self
)
->
None
:
print
(
"Starting frontend"
)
@
dynamo_endpoint
(
is_api
=
True
)
@
dynamo_endpoint
(
transports
=
[
DynamoTransport
.
HTTP
]
)
async
def
generate
(
self
,
request
:
RequestType
):
"""Stream results from the pipeline."""
print
(
f
"Frontend received:
{
request
.
text
}
"
)
...
...
deploy/sdk/src/dynamo/sdk/tests/test_e2e.py
View file @
d675d221
...
...
@@ -105,8 +105,8 @@ async def test_pipeline(setup_and_teardown):
in
text
)
break
except
Exception
:
except
Exception
as
e
:
if
attempt
==
max_retries
-
1
:
raise
print
(
f
"Attempt
{
attempt
+
1
}
failed, retrying..."
)
print
(
f
"Attempt
{
attempt
+
1
}
failed, retrying...
{
e
}
"
)
await
asyncio
.
sleep
(
3
)
deploy/sdk/src/dynamo/sdk/tests/test_link.py
View file @
d675d221
...
...
@@ -15,7 +15,7 @@
import
pytest
from
dynamo.sdk.
lib.servi
ce
import
LinkedServices
from
dynamo.sdk.
core.protocol.interfa
ce
import
LinkedServices
pytestmark
=
pytest
.
mark
.
pre_merge
...
...
@@ -38,8 +38,8 @@ def test_remove_backend2():
LinkedServices
.
remove_unused_edges
()
# Final state assertions after linking and cleanup
assert
set
(
Frontend
.
dependencies
.
keys
())
==
{
"middle"
}
assert
Frontend
.
dependencies
[
"middle"
].
on
==
Middle
assert
set
(
Frontend
.
dependencies
.
keys
())
==
{
"middle"
}
assert
set
(
Middle
.
dependencies
.
keys
())
==
{
"backend"
}
assert
Middle
.
dependencies
[
"backend"
].
on
==
Backend
...
...
docs/backend.md
View file @
d675d221
...
...
@@ -30,7 +30,6 @@ from dynamo.sdk import dynamo_endpoint, service
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
},
)
...
...
@@ -108,7 +107,6 @@ class ResponseType(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
}
)
...
...
@@ -218,7 +216,6 @@ class ResponseType(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
}
)
...
...
@@ -312,7 +309,6 @@ in your class implementation:
```
python
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
},
)
...
...
@@ -340,7 +336,6 @@ your own custom metrics and use them in your cost function:
```
python
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
},
)
...
...
@@ -551,7 +546,6 @@ disaggregation, the DecodeWorker could just always do the Prefill step as well.
```
python
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
},
)
...
...
@@ -579,7 +573,6 @@ class DecodeWorker:
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"your_namespace"
,
},
)
...
...
examples/hello_world/disagg_skeleton/components/frontend.py
View file @
d675d221
...
...
@@ -22,8 +22,7 @@ from components.utils import GeneralRequest
from
fastapi
import
FastAPI
from
fastapi.responses
import
StreamingResponse
from
dynamo.sdk
import
depends
,
dynamo_endpoint
,
service
from
dynamo.sdk.lib.image
import
DYNAMO_IMAGE
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_api
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -31,7 +30,7 @@ app = FastAPI(title="Hello World LLM")
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo-demo"
},
dynamo
=
{
"namespace"
:
"dynamo-demo"
},
image
=
DYNAMO_IMAGE
,
app
=
app
,
)
...
...
@@ -46,7 +45,7 @@ class Frontend:
logger
.
debug
(
f
"Received signal
{
signum
}
, shutting down..."
)
sys
.
exit
(
0
)
@
dynamo_
endpoint
(
is_api
=
True
)
@
dynamo_
api
(
)
async
def
generate
(
self
,
prompt
,
request_id
):
# from request body keys
"""Stream results from the pipeline."""
logger
.
info
(
f
"Received:
{
prompt
=
}
,
{
request_id
=
}
"
)
...
...
examples/hello_world/disagg_skeleton/components/kv_router.py
View file @
d675d221
...
...
@@ -30,7 +30,6 @@ logger = logging.getLogger(__name__)
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo-demo"
,
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/hello_world/hello_world.py
View file @
d675d221
...
...
@@ -14,17 +14,18 @@
# limitations under the License.
import
logging
import
os
from
fastapi
import
FastAPI
from
fastapi.responses
import
StreamingResponse
from
pydantic
import
BaseModel
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_endpoint
,
service
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_api
,
dynamo_endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
"""
Pipeline Architecture:
...
...
@@ -57,7 +58,6 @@ class ResponseType(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
,
},
image
=
DYNAMO_IMAGE
,
...
...
@@ -76,11 +76,11 @@ class Backend:
logger
.
info
(
f
"Backend received:
{
req_text
}
"
)
text
=
f
"
{
req_text
}
-
{
self
.
message
}
"
for
token
in
text
.
split
():
yield
f
"Backend:
{
token
}
"
yield
f
"
[process_id:
{
os
.
getpid
()
}
]
Backend:
{
token
}
"
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
},
dynamo
=
{
"namespace"
:
"inference"
},
image
=
DYNAMO_IMAGE
,
)
class
Middle
:
...
...
@@ -101,16 +101,12 @@ class Middle:
next_request
=
RequestType
(
text
=
text
).
model_dump_json
()
async
for
response
in
self
.
backend
.
generate
(
next_request
):
logger
.
info
(
f
"Middle received response:
{
response
}
"
)
yield
f
"Middle:
{
response
}
"
app
=
FastAPI
(
title
=
"Hello World!"
)
yield
f
"[process_id:
{
os
.
getpid
()
}
] Middle:
{
response
}
"
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"inference"
},
dynamo
=
{
"namespace"
:
"inference"
},
image
=
DYNAMO_IMAGE
,
app
=
app
,
)
class
Frontend
:
"""A simple frontend HTTP API that forwards requests to the dynamo graph."""
...
...
@@ -128,13 +124,14 @@ class Frontend:
logger
.
info
(
f
"Frontend config message:
{
self
.
message
}
"
)
logger
.
info
(
f
"Frontend config port:
{
self
.
port
}
"
)
@
dynamo_endpoint
(
is_api
=
True
)
# alternative syntax: @dynamo_endpoint(transports=[DynamoTransport.HTTP])
@
dynamo_api
()
async
def
generate
(
self
,
request
:
RequestType
):
"""Stream results from the pipeline."""
logger
.
info
(
f
"Frontend received:
{
request
.
text
}
"
)
async
def
content_generator
():
async
for
response
in
self
.
middle
.
generate
(
request
.
model_dump_json
()):
yield
f
"Frontend:
{
response
}
"
yield
f
"
[process_id:
{
os
.
getpid
()
}
]
Frontend:
{
response
}
"
return
StreamingResponse
(
content_generator
())
examples/llm/components/frontend.py
View file @
d675d221
...
...
@@ -20,7 +20,6 @@ from pathlib import Path
from
components.planner_service
import
Planner
from
components.processor
import
Processor
from
components.worker
import
VllmWorker
from
fastapi
import
FastAPI
from
pydantic
import
BaseModel
from
dynamo
import
sdk
...
...
@@ -52,13 +51,11 @@ class FrontendConfig(BaseModel):
# todo this should be called ApiServer
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
workers
=
1
,
image
=
DYNAMO_IMAGE
,
app
=
FastAPI
(
title
=
"LLM Example"
),
)
class
Frontend
:
planner
=
depends
(
Planner
)
...
...
@@ -71,7 +68,6 @@ class Frontend:
frontend_config
=
FrontendConfig
(
**
config
.
get
(
"Frontend"
,
{}))
self
.
frontend_config
=
frontend_config
self
.
process
=
None
self
.
setup_model
()
self
.
start_http_server
()
...
...
examples/llm/components/kv_router.py
View file @
d675d221
...
...
@@ -76,7 +76,6 @@ def parse_args(service_name, prefix) -> Namespace:
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/llm/components/planner_service.py
View file @
d675d221
...
...
@@ -33,7 +33,6 @@ class RequestType(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
"component_type"
:
"planner"
,
},
...
...
examples/llm/components/prefill_worker.py
View file @
d675d221
...
...
@@ -41,7 +41,6 @@ class RequestType(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/llm/components/processor.py
View file @
d675d221
...
...
@@ -45,7 +45,6 @@ class RequestType(Enum):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/llm/components/worker.py
View file @
d675d221
...
...
@@ -39,7 +39,6 @@ logger = logging.getLogger(__name__)
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/multimodal/components/encode_worker.py
View file @
d675d221
...
...
@@ -31,7 +31,6 @@ logger = logging.getLogger(__name__)
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/multimodal/components/frontend.py
View file @
d675d221
...
...
@@ -20,14 +20,13 @@ from fastapi import FastAPI
from
fastapi.responses
import
StreamingResponse
from
utils.protocol
import
MultiModalRequest
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_
api
,
service
logger
=
logging
.
getLogger
(
__name__
)
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
@@ -38,7 +37,7 @@ logger = logging.getLogger(__name__)
class
Frontend
:
processor
=
depends
(
Processor
)
@
dynamo_
endpoint
(
is_api
=
True
)
@
dynamo_
api
(
)
async
def
generate
(
self
,
request
:
MultiModalRequest
):
async
def
content_generator
():
async
for
response
in
self
.
processor
.
generate
(
request
.
model_dump_json
()):
...
...
examples/multimodal/components/prefill_worker.py
View file @
d675d221
...
...
@@ -45,7 +45,6 @@ class RequestType(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/multimodal/components/processor.py
View file @
d675d221
...
...
@@ -43,7 +43,6 @@ class RequestType(Enum):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/multimodal/components/worker.py
View file @
d675d221
...
...
@@ -48,7 +48,6 @@ logger = logging.getLogger(__name__)
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
resources
=
{
"gpu"
:
1
,
"cpu"
:
"10"
,
"memory"
:
"20Gi"
},
...
...
examples/sglang/components/frontend.py
View file @
d675d221
...
...
@@ -49,7 +49,6 @@ class FrontendConfig(BaseModel):
@
service
(
dynamo
=
{
"enabled"
:
True
,
"namespace"
:
"dynamo"
,
},
workers
=
1
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment