Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
6d46288c
Unverified
Commit
6d46288c
authored
May 21, 2025
by
Biswa Panda
Committed by
GitHub
May 21, 2025
Browse files
feat: rename dynamo decorator (#1133)
parent
b520bf44
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
27 additions
and
27 deletions
+27
-27
examples/multimodal/components/encode_worker.py
examples/multimodal/components/encode_worker.py
+2
-2
examples/multimodal/components/frontend.py
examples/multimodal/components/frontend.py
+2
-2
examples/multimodal/components/prefill_worker.py
examples/multimodal/components/prefill_worker.py
+2
-2
examples/multimodal/components/processor.py
examples/multimodal/components/processor.py
+2
-2
examples/multimodal/components/worker.py
examples/multimodal/components/worker.py
+2
-2
examples/sglang/components/decode_worker.py
examples/sglang/components/decode_worker.py
+2
-2
examples/sglang/components/worker.py
examples/sglang/components/worker.py
+2
-2
examples/tensorrt_llm/components/kv_router.py
examples/tensorrt_llm/components/kv_router.py
+2
-2
examples/tensorrt_llm/components/prefill_worker.py
examples/tensorrt_llm/components/prefill_worker.py
+2
-2
examples/tensorrt_llm/components/processor.py
examples/tensorrt_llm/components/processor.py
+3
-3
examples/tensorrt_llm/components/worker.py
examples/tensorrt_llm/components/worker.py
+2
-2
examples/vllm_v1/components/simple_load_balancer.py
examples/vllm_v1/components/simple_load_balancer.py
+2
-2
examples/vllm_v1/components/worker.py
examples/vllm_v1/components/worker.py
+2
-2
No files found.
examples/multimodal/components/encode_worker.py
View file @
6d46288c
...
@@ -24,7 +24,7 @@ from transformers import AutoImageProcessor, LlavaForConditionalGeneration
...
@@ -24,7 +24,7 @@ from transformers import AutoImageProcessor, LlavaForConditionalGeneration
from
utils.protocol
import
EncodeRequest
,
EncodeResponse
from
utils.protocol
import
EncodeRequest
,
EncodeResponse
from
utils.vllm
import
parse_vllm_args
from
utils.vllm
import
parse_vllm_args
from
dynamo.sdk
import
dynamo_
endpoint
,
service
from
dynamo.sdk
import
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -50,7 +50,7 @@ class EncodeWorker:
...
@@ -50,7 +50,7 @@ class EncodeWorker:
self
.
MODEL_ID
,
device_map
=
"auto"
,
torch_dtype
=
torch
.
float16
self
.
MODEL_ID
,
device_map
=
"auto"
,
torch_dtype
=
torch
.
float16
).
eval
()
).
eval
()
@
dynamo_
endpoint
()
@
endpoint
()
async
def
encode
(
self
,
request
:
EncodeRequest
)
->
AsyncIterator
[
EncodeResponse
]:
async
def
encode
(
self
,
request
:
EncodeRequest
)
->
AsyncIterator
[
EncodeResponse
]:
image
=
self
.
open_image
(
request
.
image_url
)
image
=
self
.
open_image
(
request
.
image_url
)
image_embeds
=
self
.
image_processor
(
images
=
image
,
return_tensors
=
"pt"
)
image_embeds
=
self
.
image_processor
(
images
=
image
,
return_tensors
=
"pt"
)
...
...
examples/multimodal/components/frontend.py
View file @
6d46288c
...
@@ -20,7 +20,7 @@ from fastapi import FastAPI
...
@@ -20,7 +20,7 @@ from fastapi import FastAPI
from
fastapi.responses
import
StreamingResponse
from
fastapi.responses
import
StreamingResponse
from
utils.protocol
import
MultiModalRequest
from
utils.protocol
import
MultiModalRequest
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_api
,
service
from
dynamo.sdk
import
DYNAMO_IMAGE
,
api
,
depends
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
...
@@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
class
Frontend
:
class
Frontend
:
processor
=
depends
(
Processor
)
processor
=
depends
(
Processor
)
@
dynamo_
api
()
@
api
()
async
def
generate
(
self
,
request
:
MultiModalRequest
):
async
def
generate
(
self
,
request
:
MultiModalRequest
):
async
def
content_generator
():
async
def
content_generator
():
async
for
response
in
self
.
processor
.
generate
(
request
.
model_dump_json
()):
async
for
response
in
self
.
processor
.
generate
(
request
.
model_dump_json
()):
...
...
examples/multimodal/components/prefill_worker.py
View file @
6d46288c
...
@@ -34,7 +34,7 @@ from vllm.entrypoints.openai.api_server import (
...
@@ -34,7 +34,7 @@ from vllm.entrypoints.openai.api_server import (
from
vllm.inputs.data
import
TokensPrompt
from
vllm.inputs.data
import
TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -223,6 +223,6 @@ class PrefillWorker:
...
@@ -223,6 +223,6 @@ class PrefillWorker:
):
):
yield
yield
@
dynamo_
endpoint
()
@
endpoint
()
async
def
mock
(
self
,
req
:
RequestType
):
async
def
mock
(
self
,
req
:
RequestType
):
yield
f
"mock_response:
{
req
}
"
yield
f
"mock_response:
{
req
}
"
examples/multimodal/components/processor.py
View file @
6d46288c
...
@@ -31,7 +31,7 @@ from vllm.outputs import RequestOutput
...
@@ -31,7 +31,7 @@ from vllm.outputs import RequestOutput
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
dynamo.runtime
import
EtcdKvCache
from
dynamo.runtime
import
EtcdKvCache
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -195,7 +195,7 @@ class Processor(ProcessMixIn):
...
@@ -195,7 +195,7 @@ class Processor(ProcessMixIn):
)
)
# The generate endpoint will be used by the frontend to handle incoming requests.
# The generate endpoint will be used by the frontend to handle incoming requests.
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
MultiModalRequest
):
async
def
generate
(
self
,
request
:
MultiModalRequest
):
# TODO: After having the multimodal support in OpenAI compatible frontend, we can use that directly and remove the custom endpoint.
# TODO: After having the multimodal support in OpenAI compatible frontend, we can use that directly and remove the custom endpoint.
msg
=
{
msg
=
{
...
...
examples/multimodal/components/worker.py
View file @
6d46288c
...
@@ -41,7 +41,7 @@ from vllm.inputs.data import TokensPrompt
...
@@ -41,7 +41,7 @@ from vllm.inputs.data import TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.sampling_params
import
RequestOutputKind
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -175,7 +175,7 @@ class VllmWorker:
...
@@ -175,7 +175,7 @@ class VllmWorker:
return
callback
return
callback
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
):
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
):
image_features
=
None
image_features
=
None
if
self
.
do_remote_prefill
:
if
self
.
do_remote_prefill
:
...
...
examples/sglang/components/decode_worker.py
View file @
6d46288c
...
@@ -21,7 +21,7 @@ import sglang as sgl
...
@@ -21,7 +21,7 @@ import sglang as sgl
from
utils.protocol
import
DisaggPreprocessedRequest
from
utils.protocol
import
DisaggPreprocessedRequest
from
utils.sglang
import
parse_sglang_args
from
utils.sglang
import
parse_sglang_args
from
dynamo.sdk
import
dynamo_
endpoint
,
service
from
dynamo.sdk
import
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -42,7 +42,7 @@ class SGLangDecodeWorker:
...
@@ -42,7 +42,7 @@ class SGLangDecodeWorker:
logger
.
warning
(
"Decode worker initialized"
)
logger
.
warning
(
"Decode worker initialized"
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
req
:
DisaggPreprocessedRequest
):
async
def
generate
(
self
,
req
:
DisaggPreprocessedRequest
):
g
=
await
self
.
engine
.
async_generate
(
g
=
await
self
.
engine
.
async_generate
(
input_ids
=
req
.
request
.
token_ids
,
input_ids
=
req
.
request
.
token_ids
,
...
...
examples/sglang/components/worker.py
View file @
6d46288c
...
@@ -36,7 +36,7 @@ from utils.protocol import DisaggPreprocessedRequest, PreprocessedRequest
...
@@ -36,7 +36,7 @@ from utils.protocol import DisaggPreprocessedRequest, PreprocessedRequest
from
utils.sglang
import
parse_sglang_args
from
utils.sglang
import
parse_sglang_args
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -112,7 +112,7 @@ class SGLangWorker:
...
@@ -112,7 +112,7 @@ class SGLangWorker:
sampling_params
[
"ignore_eos"
]
=
request
.
stop_conditions
.
ignore_eos
sampling_params
[
"ignore_eos"
]
=
request
.
stop_conditions
.
ignore_eos
return
sampling_params
return
sampling_params
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
# TODO: maintain a mapping from SGLang's Ouput struct to LLMEngineOuput
# TODO: maintain a mapping from SGLang's Ouput struct to LLMEngineOuput
sampling_params
=
self
.
_build_sampling_params
(
request
)
sampling_params
=
self
.
_build_sampling_params
(
request
)
...
...
examples/tensorrt_llm/components/kv_router.py
View file @
6d46288c
...
@@ -25,7 +25,7 @@ from common.protocol import Tokens
...
@@ -25,7 +25,7 @@ from common.protocol import Tokens
from
components.worker
import
TensorRTLLMWorker
from
components.worker
import
TensorRTLLMWorker
from
dynamo.llm
import
AggregatedMetrics
,
KvIndexer
,
KvMetricsAggregator
,
OverlapScores
from
dynamo.llm
import
AggregatedMetrics
,
KvIndexer
,
KvMetricsAggregator
,
OverlapScores
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -209,7 +209,7 @@ class Router:
...
@@ -209,7 +209,7 @@ class Router:
return
best_worker_id
,
worker_scores
.
get
(
best_worker_id
,
0.0
)
return
best_worker_id
,
worker_scores
.
get
(
best_worker_id
,
0.0
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
Tokens
)
->
AsyncIterator
[
WorkerId
]:
async
def
generate
(
self
,
request
:
Tokens
)
->
AsyncIterator
[
WorkerId
]:
if
self
.
indexer
is
None
or
self
.
metrics_aggregator
is
None
:
if
self
.
indexer
is
None
or
self
.
metrics_aggregator
is
None
:
yield
"_0.0"
yield
"_0.0"
...
...
examples/tensorrt_llm/components/prefill_worker.py
View file @
6d46288c
...
@@ -20,7 +20,7 @@ from common.parser import parse_tensorrt_llm_args
...
@@ -20,7 +20,7 @@ from common.parser import parse_tensorrt_llm_args
from
common.protocol
import
TRTLLMWorkerRequest
from
common.protocol
import
TRTLLMWorkerRequest
from
common.utils
import
ServerType
from
common.utils
import
ServerType
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -68,7 +68,7 @@ class TensorRTLLMPrefillWorker(BaseTensorrtLLMEngine):
...
@@ -68,7 +68,7 @@ class TensorRTLLMPrefillWorker(BaseTensorrtLLMEngine):
component
=
dynamo_context
[
"component"
]
component
=
dynamo_context
[
"component"
]
await
self
.
kv_metrics_publisher
.
create_endpoint
(
component
)
await
self
.
kv_metrics_publisher
.
create_endpoint
(
component
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
for
response
in
super
().
generate
(
request
):
async
for
response
in
super
().
generate
(
request
):
yield
response
yield
response
examples/tensorrt_llm/components/processor.py
View file @
6d46288c
...
@@ -27,7 +27,7 @@ from common.utils import RequestType
...
@@ -27,7 +27,7 @@ from common.utils import RequestType
from
components.kv_router
import
Router
from
components.kv_router
import
Router
from
components.worker
import
TensorRTLLMWorker
from
components.worker
import
TensorRTLLMWorker
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -143,7 +143,7 @@ class Processor(ChatProcessorMixin):
...
@@ -143,7 +143,7 @@ class Processor(ChatProcessorMixin):
logger
.
debug
(
f
"[preprocessor] Response:
{
response
}
"
)
logger
.
debug
(
f
"[preprocessor] Response:
{
response
}
"
)
yield
json
.
loads
(
response
)
yield
json
.
loads
(
response
)
@
dynamo_
endpoint
(
name
=
"chat/completions"
)
@
endpoint
(
name
=
"chat/completions"
)
async
def
generate_chat
(
self
,
raw_request
:
DynamoTRTLLMChatCompletionRequest
):
async
def
generate_chat
(
self
,
raw_request
:
DynamoTRTLLMChatCompletionRequest
):
# max_tokens is deprecated, however if the max_tokens is provided instead
# max_tokens is deprecated, however if the max_tokens is provided instead
# of max_completion_tokens, we will use the value as max_completion_tokens.
# of max_completion_tokens, we will use the value as max_completion_tokens.
...
@@ -172,7 +172,7 @@ class Processor(ChatProcessorMixin):
...
@@ -172,7 +172,7 @@ class Processor(ChatProcessorMixin):
async
for
response
in
self
.
_generate
(
raw_request
,
RequestType
.
CHAT
):
async
for
response
in
self
.
_generate
(
raw_request
,
RequestType
.
CHAT
):
yield
response
yield
response
@
dynamo_
endpoint
(
name
=
"completions"
)
@
endpoint
(
name
=
"completions"
)
async
def
completions
(
self
,
raw_request
:
DynamoTRTLLMCompletionRequest
):
async
def
completions
(
self
,
raw_request
:
DynamoTRTLLMCompletionRequest
):
# min_tokens isn't currently propagated through the Rust OpenAI HTTP frontend,
# min_tokens isn't currently propagated through the Rust OpenAI HTTP frontend,
# and ignore_eos is passed through the 'nvext' field, so set both when found.
# and ignore_eos is passed through the 'nvext' field, so set both when found.
...
...
examples/tensorrt_llm/components/worker.py
View file @
6d46288c
...
@@ -21,7 +21,7 @@ from common.protocol import TRTLLMWorkerRequest
...
@@ -21,7 +21,7 @@ from common.protocol import TRTLLMWorkerRequest
from
common.utils
import
ServerType
from
common.utils
import
ServerType
from
components.prefill_worker
import
TensorRTLLMPrefillWorker
from
components.prefill_worker
import
TensorRTLLMPrefillWorker
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -91,7 +91,7 @@ class TensorRTLLMWorker(BaseTensorrtLLMEngine):
...
@@ -91,7 +91,7 @@ class TensorRTLLMWorker(BaseTensorrtLLMEngine):
component
=
dynamo_context
[
"component"
]
component
=
dynamo_context
[
"component"
]
await
self
.
_kv_metrics_publisher
.
create_endpoint
(
component
)
await
self
.
_kv_metrics_publisher
.
create_endpoint
(
component
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
for
response
in
super
().
generate
(
request
):
async
for
response
in
super
().
generate
(
request
):
yield
response
yield
response
examples/vllm_v1/components/simple_load_balancer.py
View file @
6d46288c
...
@@ -25,7 +25,7 @@ from vllm.inputs import TokensPrompt
...
@@ -25,7 +25,7 @@ from vllm.inputs import TokensPrompt
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -129,7 +129,7 @@ class SimpleLoadBalancer:
...
@@ -129,7 +129,7 @@ class SimpleLoadBalancer:
):
):
yield
MyRequestOutput
.
model_validate_json
(
decode_response
.
data
())
yield
MyRequestOutput
.
model_validate_json
(
decode_response
.
data
())
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
logger
.
debug
(
logger
.
debug
(
"Processor received completion request: %s"
,
request
.
model_dump_json
()
"Processor received completion request: %s"
,
request
.
model_dump_json
()
...
...
examples/vllm_v1/components/worker.py
View file @
6d46288c
...
@@ -27,7 +27,7 @@ from vllm.entrypoints.openai.api_server import (
...
@@ -27,7 +27,7 @@ from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args
,
build_async_engine_client_from_engine_args
,
)
)
from
dynamo.sdk
import
async_on_start
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -65,7 +65,7 @@ class VllmBaseWorker:
...
@@ -65,7 +65,7 @@ class VllmBaseWorker:
finally
:
finally
:
loop
.
stop
()
loop
.
stop
()
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
vLLMGenerateRequest
):
async
def
generate
(
self
,
request
:
vLLMGenerateRequest
):
gen
=
self
.
engine_client
.
generate
(
gen
=
self
.
engine_client
.
generate
(
prompt
=
request
.
prompt
,
prompt
=
request
.
prompt
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment