Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
6d46288c
Unverified
Commit
6d46288c
authored
May 21, 2025
by
Biswa Panda
Committed by
GitHub
May 21, 2025
Browse files
feat: rename dynamo decorator (#1133)
parent
b520bf44
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
27 additions
and
27 deletions
+27
-27
examples/multimodal/components/encode_worker.py
examples/multimodal/components/encode_worker.py
+2
-2
examples/multimodal/components/frontend.py
examples/multimodal/components/frontend.py
+2
-2
examples/multimodal/components/prefill_worker.py
examples/multimodal/components/prefill_worker.py
+2
-2
examples/multimodal/components/processor.py
examples/multimodal/components/processor.py
+2
-2
examples/multimodal/components/worker.py
examples/multimodal/components/worker.py
+2
-2
examples/sglang/components/decode_worker.py
examples/sglang/components/decode_worker.py
+2
-2
examples/sglang/components/worker.py
examples/sglang/components/worker.py
+2
-2
examples/tensorrt_llm/components/kv_router.py
examples/tensorrt_llm/components/kv_router.py
+2
-2
examples/tensorrt_llm/components/prefill_worker.py
examples/tensorrt_llm/components/prefill_worker.py
+2
-2
examples/tensorrt_llm/components/processor.py
examples/tensorrt_llm/components/processor.py
+3
-3
examples/tensorrt_llm/components/worker.py
examples/tensorrt_llm/components/worker.py
+2
-2
examples/vllm_v1/components/simple_load_balancer.py
examples/vllm_v1/components/simple_load_balancer.py
+2
-2
examples/vllm_v1/components/worker.py
examples/vllm_v1/components/worker.py
+2
-2
No files found.
examples/multimodal/components/encode_worker.py
View file @
6d46288c
...
...
@@ -24,7 +24,7 @@ from transformers import AutoImageProcessor, LlavaForConditionalGeneration
from
utils.protocol
import
EncodeRequest
,
EncodeResponse
from
utils.vllm
import
parse_vllm_args
from
dynamo.sdk
import
dynamo_
endpoint
,
service
from
dynamo.sdk
import
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -50,7 +50,7 @@ class EncodeWorker:
self
.
MODEL_ID
,
device_map
=
"auto"
,
torch_dtype
=
torch
.
float16
).
eval
()
@
dynamo_
endpoint
()
@
endpoint
()
async
def
encode
(
self
,
request
:
EncodeRequest
)
->
AsyncIterator
[
EncodeResponse
]:
image
=
self
.
open_image
(
request
.
image_url
)
image_embeds
=
self
.
image_processor
(
images
=
image
,
return_tensors
=
"pt"
)
...
...
examples/multimodal/components/frontend.py
View file @
6d46288c
...
...
@@ -20,7 +20,7 @@ from fastapi import FastAPI
from
fastapi.responses
import
StreamingResponse
from
utils.protocol
import
MultiModalRequest
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_api
,
service
from
dynamo.sdk
import
DYNAMO_IMAGE
,
api
,
depends
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
class
Frontend
:
processor
=
depends
(
Processor
)
@
dynamo_
api
()
@
api
()
async
def
generate
(
self
,
request
:
MultiModalRequest
):
async
def
content_generator
():
async
for
response
in
self
.
processor
.
generate
(
request
.
model_dump_json
()):
...
...
examples/multimodal/components/prefill_worker.py
View file @
6d46288c
...
...
@@ -34,7 +34,7 @@ from vllm.entrypoints.openai.api_server import (
from
vllm.inputs.data
import
TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -223,6 +223,6 @@ class PrefillWorker:
):
yield
@
dynamo_
endpoint
()
@
endpoint
()
async
def
mock
(
self
,
req
:
RequestType
):
yield
f
"mock_response:
{
req
}
"
examples/multimodal/components/processor.py
View file @
6d46288c
...
...
@@ -31,7 +31,7 @@ from vllm.outputs import RequestOutput
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
dynamo.runtime
import
EtcdKvCache
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -195,7 +195,7 @@ class Processor(ProcessMixIn):
)
# The generate endpoint will be used by the frontend to handle incoming requests.
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
MultiModalRequest
):
# TODO: After having the multimodal support in OpenAI compatible frontend, we can use that directly and remove the custom endpoint.
msg
=
{
...
...
examples/multimodal/components/worker.py
View file @
6d46288c
...
...
@@ -41,7 +41,7 @@ from vllm.inputs.data import TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.sampling_params
import
RequestOutputKind
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -175,7 +175,7 @@ class VllmWorker:
return
callback
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
):
image_features
=
None
if
self
.
do_remote_prefill
:
...
...
examples/sglang/components/decode_worker.py
View file @
6d46288c
...
...
@@ -21,7 +21,7 @@ import sglang as sgl
from
utils.protocol
import
DisaggPreprocessedRequest
from
utils.sglang
import
parse_sglang_args
from
dynamo.sdk
import
dynamo_
endpoint
,
service
from
dynamo.sdk
import
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -42,7 +42,7 @@ class SGLangDecodeWorker:
logger
.
warning
(
"Decode worker initialized"
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
req
:
DisaggPreprocessedRequest
):
g
=
await
self
.
engine
.
async_generate
(
input_ids
=
req
.
request
.
token_ids
,
...
...
examples/sglang/components/worker.py
View file @
6d46288c
...
...
@@ -36,7 +36,7 @@ from utils.protocol import DisaggPreprocessedRequest, PreprocessedRequest
from
utils.sglang
import
parse_sglang_args
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -112,7 +112,7 @@ class SGLangWorker:
sampling_params
[
"ignore_eos"
]
=
request
.
stop_conditions
.
ignore_eos
return
sampling_params
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
# TODO: maintain a mapping from SGLang's Ouput struct to LLMEngineOuput
sampling_params
=
self
.
_build_sampling_params
(
request
)
...
...
examples/tensorrt_llm/components/kv_router.py
View file @
6d46288c
...
...
@@ -25,7 +25,7 @@ from common.protocol import Tokens
from
components.worker
import
TensorRTLLMWorker
from
dynamo.llm
import
AggregatedMetrics
,
KvIndexer
,
KvMetricsAggregator
,
OverlapScores
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -209,7 +209,7 @@ class Router:
return
best_worker_id
,
worker_scores
.
get
(
best_worker_id
,
0.0
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
Tokens
)
->
AsyncIterator
[
WorkerId
]:
if
self
.
indexer
is
None
or
self
.
metrics_aggregator
is
None
:
yield
"_0.0"
...
...
examples/tensorrt_llm/components/prefill_worker.py
View file @
6d46288c
...
...
@@ -20,7 +20,7 @@ from common.parser import parse_tensorrt_llm_args
from
common.protocol
import
TRTLLMWorkerRequest
from
common.utils
import
ServerType
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -68,7 +68,7 @@ class TensorRTLLMPrefillWorker(BaseTensorrtLLMEngine):
component
=
dynamo_context
[
"component"
]
await
self
.
kv_metrics_publisher
.
create_endpoint
(
component
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
for
response
in
super
().
generate
(
request
):
yield
response
examples/tensorrt_llm/components/processor.py
View file @
6d46288c
...
...
@@ -27,7 +27,7 @@ from common.utils import RequestType
from
components.kv_router
import
Router
from
components.worker
import
TensorRTLLMWorker
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -143,7 +143,7 @@ class Processor(ChatProcessorMixin):
logger
.
debug
(
f
"[preprocessor] Response:
{
response
}
"
)
yield
json
.
loads
(
response
)
@
dynamo_
endpoint
(
name
=
"chat/completions"
)
@
endpoint
(
name
=
"chat/completions"
)
async
def
generate_chat
(
self
,
raw_request
:
DynamoTRTLLMChatCompletionRequest
):
# max_tokens is deprecated, however if the max_tokens is provided instead
# of max_completion_tokens, we will use the value as max_completion_tokens.
...
...
@@ -172,7 +172,7 @@ class Processor(ChatProcessorMixin):
async
for
response
in
self
.
_generate
(
raw_request
,
RequestType
.
CHAT
):
yield
response
@
dynamo_
endpoint
(
name
=
"completions"
)
@
endpoint
(
name
=
"completions"
)
async
def
completions
(
self
,
raw_request
:
DynamoTRTLLMCompletionRequest
):
# min_tokens isn't currently propagated through the Rust OpenAI HTTP frontend,
# and ignore_eos is passed through the 'nvext' field, so set both when found.
...
...
examples/tensorrt_llm/components/worker.py
View file @
6d46288c
...
...
@@ -21,7 +21,7 @@ from common.protocol import TRTLLMWorkerRequest
from
common.utils
import
ServerType
from
components.prefill_worker
import
TensorRTLLMPrefillWorker
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -91,7 +91,7 @@ class TensorRTLLMWorker(BaseTensorrtLLMEngine):
component
=
dynamo_context
[
"component"
]
await
self
.
_kv_metrics_publisher
.
create_endpoint
(
component
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
for
response
in
super
().
generate
(
request
):
yield
response
examples/vllm_v1/components/simple_load_balancer.py
View file @
6d46288c
...
...
@@ -25,7 +25,7 @@ from vllm.inputs import TokensPrompt
from
vllm.sampling_params
import
SamplingParams
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -129,7 +129,7 @@ class SimpleLoadBalancer:
):
yield
MyRequestOutput
.
model_validate_json
(
decode_response
.
data
())
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
logger
.
debug
(
"Processor received completion request: %s"
,
request
.
model_dump_json
()
...
...
examples/vllm_v1/components/worker.py
View file @
6d46288c
...
...
@@ -27,7 +27,7 @@ from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args
,
)
from
dynamo.sdk
import
async_on_start
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -65,7 +65,7 @@ class VllmBaseWorker:
finally
:
loop
.
stop
()
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
vLLMGenerateRequest
):
gen
=
self
.
engine_client
.
generate
(
prompt
=
request
.
prompt
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment