Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
6d46288c
"vscode:/vscode.git/clone" did not exist on "344c21dc0a0fd99a6653be51268bee5ff5db0d0d"
Unverified
Commit
6d46288c
authored
May 21, 2025
by
Biswa Panda
Committed by
GitHub
May 21, 2025
Browse files
feat: rename dynamo decorator (#1133)
parent
b520bf44
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
27 additions
and
27 deletions
+27
-27
examples/multimodal/components/encode_worker.py
examples/multimodal/components/encode_worker.py
+2
-2
examples/multimodal/components/frontend.py
examples/multimodal/components/frontend.py
+2
-2
examples/multimodal/components/prefill_worker.py
examples/multimodal/components/prefill_worker.py
+2
-2
examples/multimodal/components/processor.py
examples/multimodal/components/processor.py
+2
-2
examples/multimodal/components/worker.py
examples/multimodal/components/worker.py
+2
-2
examples/sglang/components/decode_worker.py
examples/sglang/components/decode_worker.py
+2
-2
examples/sglang/components/worker.py
examples/sglang/components/worker.py
+2
-2
examples/tensorrt_llm/components/kv_router.py
examples/tensorrt_llm/components/kv_router.py
+2
-2
examples/tensorrt_llm/components/prefill_worker.py
examples/tensorrt_llm/components/prefill_worker.py
+2
-2
examples/tensorrt_llm/components/processor.py
examples/tensorrt_llm/components/processor.py
+3
-3
examples/tensorrt_llm/components/worker.py
examples/tensorrt_llm/components/worker.py
+2
-2
examples/vllm_v1/components/simple_load_balancer.py
examples/vllm_v1/components/simple_load_balancer.py
+2
-2
examples/vllm_v1/components/worker.py
examples/vllm_v1/components/worker.py
+2
-2
No files found.
examples/multimodal/components/encode_worker.py
View file @
6d46288c
...
...
@@ -24,7 +24,7 @@ from transformers import AutoImageProcessor, LlavaForConditionalGeneration
from
utils.protocol
import
EncodeRequest
,
EncodeResponse
from
utils.vllm
import
parse_vllm_args
from
dynamo.sdk
import
dynamo_
endpoint
,
service
from
dynamo.sdk
import
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -50,7 +50,7 @@ class EncodeWorker:
self
.
MODEL_ID
,
device_map
=
"auto"
,
torch_dtype
=
torch
.
float16
).
eval
()
@
dynamo_
endpoint
()
@
endpoint
()
async
def
encode
(
self
,
request
:
EncodeRequest
)
->
AsyncIterator
[
EncodeResponse
]:
image
=
self
.
open_image
(
request
.
image_url
)
image_embeds
=
self
.
image_processor
(
images
=
image
,
return_tensors
=
"pt"
)
...
...
examples/multimodal/components/frontend.py
View file @
6d46288c
...
...
@@ -20,7 +20,7 @@ from fastapi import FastAPI
from
fastapi.responses
import
StreamingResponse
from
utils.protocol
import
MultiModalRequest
from
dynamo.sdk
import
DYNAMO_IMAGE
,
depends
,
dynamo_api
,
service
from
dynamo.sdk
import
DYNAMO_IMAGE
,
api
,
depends
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
class
Frontend
:
processor
=
depends
(
Processor
)
@
dynamo_
api
()
@
api
()
async
def
generate
(
self
,
request
:
MultiModalRequest
):
async
def
content_generator
():
async
for
response
in
self
.
processor
.
generate
(
request
.
model_dump_json
()):
...
...
examples/multimodal/components/prefill_worker.py
View file @
6d46288c
...
...
@@ -34,7 +34,7 @@ from vllm.entrypoints.openai.api_server import (
from
vllm.inputs.data
import
TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -223,6 +223,6 @@ class PrefillWorker:
):
yield
@
dynamo_
endpoint
()
@
endpoint
()
async
def
mock
(
self
,
req
:
RequestType
):
yield
f
"mock_response:
{
req
}
"
examples/multimodal/components/processor.py
View file @
6d46288c
...
...
@@ -31,7 +31,7 @@ from vllm.outputs import RequestOutput
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
dynamo.runtime
import
EtcdKvCache
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -195,7 +195,7 @@ class Processor(ProcessMixIn):
)
# The generate endpoint will be used by the frontend to handle incoming requests.
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
MultiModalRequest
):
# TODO: After having the multimodal support in OpenAI compatible frontend, we can use that directly and remove the custom endpoint.
msg
=
{
...
...
examples/multimodal/components/worker.py
View file @
6d46288c
...
...
@@ -41,7 +41,7 @@ from vllm.inputs.data import TokensPrompt
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.sampling_params
import
RequestOutputKind
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -175,7 +175,7 @@ class VllmWorker:
return
callback
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
vLLMMultimodalRequest
):
image_features
=
None
if
self
.
do_remote_prefill
:
...
...
examples/sglang/components/decode_worker.py
View file @
6d46288c
...
...
@@ -21,7 +21,7 @@ import sglang as sgl
from
utils.protocol
import
DisaggPreprocessedRequest
from
utils.sglang
import
parse_sglang_args
from
dynamo.sdk
import
dynamo_
endpoint
,
service
from
dynamo.sdk
import
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -42,7 +42,7 @@ class SGLangDecodeWorker:
logger
.
warning
(
"Decode worker initialized"
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
req
:
DisaggPreprocessedRequest
):
g
=
await
self
.
engine
.
async_generate
(
input_ids
=
req
.
request
.
token_ids
,
...
...
examples/sglang/components/worker.py
View file @
6d46288c
...
...
@@ -36,7 +36,7 @@ from utils.protocol import DisaggPreprocessedRequest, PreprocessedRequest
from
utils.sglang
import
parse_sglang_args
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -112,7 +112,7 @@ class SGLangWorker:
sampling_params
[
"ignore_eos"
]
=
request
.
stop_conditions
.
ignore_eos
return
sampling_params
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
# TODO: maintain a mapping from SGLang's Ouput struct to LLMEngineOuput
sampling_params
=
self
.
_build_sampling_params
(
request
)
...
...
examples/tensorrt_llm/components/kv_router.py
View file @
6d46288c
...
...
@@ -25,7 +25,7 @@ from common.protocol import Tokens
from
components.worker
import
TensorRTLLMWorker
from
dynamo.llm
import
AggregatedMetrics
,
KvIndexer
,
KvMetricsAggregator
,
OverlapScores
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -209,7 +209,7 @@ class Router:
return
best_worker_id
,
worker_scores
.
get
(
best_worker_id
,
0.0
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
Tokens
)
->
AsyncIterator
[
WorkerId
]:
if
self
.
indexer
is
None
or
self
.
metrics_aggregator
is
None
:
yield
"_0.0"
...
...
examples/tensorrt_llm/components/prefill_worker.py
View file @
6d46288c
...
...
@@ -20,7 +20,7 @@ from common.parser import parse_tensorrt_llm_args
from
common.protocol
import
TRTLLMWorkerRequest
from
common.utils
import
ServerType
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -68,7 +68,7 @@ class TensorRTLLMPrefillWorker(BaseTensorrtLLMEngine):
component
=
dynamo_context
[
"component"
]
await
self
.
kv_metrics_publisher
.
create_endpoint
(
component
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
for
response
in
super
().
generate
(
request
):
yield
response
examples/tensorrt_llm/components/processor.py
View file @
6d46288c
...
...
@@ -27,7 +27,7 @@ from common.utils import RequestType
from
components.kv_router
import
Router
from
components.worker
import
TensorRTLLMWorker
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -143,7 +143,7 @@ class Processor(ChatProcessorMixin):
logger
.
debug
(
f
"[preprocessor] Response:
{
response
}
"
)
yield
json
.
loads
(
response
)
@
dynamo_
endpoint
(
name
=
"chat/completions"
)
@
endpoint
(
name
=
"chat/completions"
)
async
def
generate_chat
(
self
,
raw_request
:
DynamoTRTLLMChatCompletionRequest
):
# max_tokens is deprecated, however if the max_tokens is provided instead
# of max_completion_tokens, we will use the value as max_completion_tokens.
...
...
@@ -172,7 +172,7 @@ class Processor(ChatProcessorMixin):
async
for
response
in
self
.
_generate
(
raw_request
,
RequestType
.
CHAT
):
yield
response
@
dynamo_
endpoint
(
name
=
"completions"
)
@
endpoint
(
name
=
"completions"
)
async
def
completions
(
self
,
raw_request
:
DynamoTRTLLMCompletionRequest
):
# min_tokens isn't currently propagated through the Rust OpenAI HTTP frontend,
# and ignore_eos is passed through the 'nvext' field, so set both when found.
...
...
examples/tensorrt_llm/components/worker.py
View file @
6d46288c
...
...
@@ -21,7 +21,7 @@ from common.protocol import TRTLLMWorkerRequest
from
common.utils
import
ServerType
from
components.prefill_worker
import
TensorRTLLMPrefillWorker
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -91,7 +91,7 @@ class TensorRTLLMWorker(BaseTensorrtLLMEngine):
component
=
dynamo_context
[
"component"
]
await
self
.
_kv_metrics_publisher
.
create_endpoint
(
component
)
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
TRTLLMWorkerRequest
):
async
for
response
in
super
().
generate
(
request
):
yield
response
examples/vllm_v1/components/simple_load_balancer.py
View file @
6d46288c
...
...
@@ -25,7 +25,7 @@ from vllm.inputs import TokensPrompt
from
vllm.sampling_params
import
SamplingParams
from
dynamo.llm
import
ModelType
,
register_llm
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
depends
,
dynamo_context
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -129,7 +129,7 @@ class SimpleLoadBalancer:
):
yield
MyRequestOutput
.
model_validate_json
(
decode_response
.
data
())
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
PreprocessedRequest
):
logger
.
debug
(
"Processor received completion request: %s"
,
request
.
model_dump_json
()
...
...
examples/vllm_v1/components/worker.py
View file @
6d46288c
...
...
@@ -27,7 +27,7 @@ from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args
,
)
from
dynamo.sdk
import
async_on_start
,
dynamo_
endpoint
,
service
from
dynamo.sdk
import
async_on_start
,
endpoint
,
service
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -65,7 +65,7 @@ class VllmBaseWorker:
finally
:
loop
.
stop
()
@
dynamo_
endpoint
()
@
endpoint
()
async
def
generate
(
self
,
request
:
vLLMGenerateRequest
):
gen
=
self
.
engine_client
.
generate
(
prompt
=
request
.
prompt
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment