Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
cc40af70
Unverified
Commit
cc40af70
authored
May 28, 2025
by
Alec
Committed by
GitHub
May 28, 2025
Browse files
fix: dynamo-run pass proper args using register-llm (#1230)
parent
e450c2c7
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
4 deletions
+11
-4
launch/dynamo-run/src/subprocess/vllm_inc.py
launch/dynamo-run/src/subprocess/vllm_inc.py
+10
-3
lib/bindings/python/src/dynamo/_core.pyi
lib/bindings/python/src/dynamo/_core.pyi
+1
-1
No files found.
launch/dynamo-run/src/subprocess/vllm_inc.py
View file @
cc40af70
...
@@ -157,7 +157,7 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -157,7 +157,7 @@ async def init(runtime: DistributedRuntime, config: Config):
# KV routing relies on logging KV metrics
# KV routing relies on logging KV metrics
"disable_log_stats"
:
False
,
"disable_log_stats"
:
False
,
}
}
if
config
.
kv_block_size
:
assert
config
.
kv_block_size
>
0
,
"Must use non-negative integer for KV Block Size"
arg_map
[
"block_size"
]
=
config
.
kv_block_size
arg_map
[
"block_size"
]
=
config
.
kv_block_size
if
config
.
context_length
:
if
config
.
context_length
:
...
@@ -201,7 +201,14 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -201,7 +201,14 @@ async def init(runtime: DistributedRuntime, config: Config):
engine_client
=
await
engine_context
.
__aenter__
()
engine_client
=
await
engine_context
.
__aenter__
()
await
register_llm
(
await
register_llm
(
ModelType
.
Backend
,
endpoint
,
config
.
model_path
,
config
.
model_name
ModelType
.
Backend
,
endpoint
,
config
.
model_path
,
config
.
model_name
,
context_length
=
arg_map
.
get
(
"max_model_len"
,
None
),
# if None, takes length from tokenizer
kv_cache_block_size
=
arg_map
[
"block_size"
],
)
)
handler
=
RequestHandler
(
component
,
engine_client
,
default_sampling_params
)
handler
=
RequestHandler
(
component
,
engine_client
,
default_sampling_params
)
handler
.
setup_kv_metrics
()
handler
.
setup_kv_metrics
()
...
...
lib/bindings/python/src/dynamo/_core.pyi
View file @
cc40af70
...
@@ -603,7 +603,7 @@ class ModelType:
...
@@ -603,7 +603,7 @@ class ModelType:
"""What type of request this model needs: Chat, Component or Backend (pre-processed)"""
"""What type of request this model needs: Chat, Component or Backend (pre-processed)"""
...
...
async def register_llm(model_type: ModelType, endpoint: Endpoint, model_path: str, model_name: Optional[str]) -> None:
async def register_llm(model_type: ModelType, endpoint: Endpoint, model_path: str, model_name: Optional[str]
= None, context_length: Optional[int] = None, kv_cache_block_size: Optional[int] = None
) -> None:
"""Attach the model at path to the given endpoint, and advertise it as model_type"""
"""Attach the model at path to the given endpoint, and advertise it as model_type"""
...
...
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment