Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
08c01d8c
"vscode:/vscode.git/clone" did not exist on "537c9ee5bb7aca2088c0bff8495bc6a69e6c5ea0"
Unverified
Commit
08c01d8c
authored
May 21, 2025
by
Neelay Shah
Committed by
GitHub
May 21, 2025
Browse files
fix: register model after engine load (#1145)
parent
8d636ebd
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
22 deletions
+33
-22
launch/dynamo-run/src/subprocess/sglang_inc.py
launch/dynamo-run/src/subprocess/sglang_inc.py
+8
-7
launch/dynamo-run/src/subprocess/vllm_inc.py
launch/dynamo-run/src/subprocess/vllm_inc.py
+25
-15
No files found.
launch/dynamo-run/src/subprocess/sglang_inc.py
View file @
08c01d8c
...
@@ -84,13 +84,6 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -84,13 +84,6 @@ async def init(runtime: DistributedRuntime, config: Config):
"""
"""
Instantiate and serve
Instantiate and serve
"""
"""
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
await
component
.
create_service
()
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
await
register_llm
(
ModelType
.
Backend
,
endpoint
,
config
.
model_path
,
config
.
model_name
)
arg_map
=
{
arg_map
=
{
"model_path"
:
config
.
model_path
,
"model_path"
:
config
.
model_path
,
...
@@ -124,6 +117,14 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -124,6 +117,14 @@ async def init(runtime: DistributedRuntime, config: Config):
engine_args
=
ServerArgs
(
**
arg_map
)
engine_args
=
ServerArgs
(
**
arg_map
)
engine_client
=
sglang
.
Engine
(
server_args
=
engine_args
)
engine_client
=
sglang
.
Engine
(
server_args
=
engine_args
)
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
await
component
.
create_service
()
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
await
register_llm
(
ModelType
.
Backend
,
endpoint
,
config
.
model_path
,
config
.
model_name
)
# the server will gracefully shutdown (i.e., keep opened TCP streams finishes)
# the server will gracefully shutdown (i.e., keep opened TCP streams finishes)
# after the lease is revoked
# after the lease is revoked
await
endpoint
.
serve_endpoint
(
RequestHandler
(
engine_client
).
generate
)
await
endpoint
.
serve_endpoint
(
RequestHandler
(
engine_client
).
generate
)
...
...
launch/dynamo-run/src/subprocess/vllm_inc.py
View file @
08c01d8c
...
@@ -133,17 +133,18 @@ async def worker(runtime: DistributedRuntime):
...
@@ -133,17 +133,18 @@ async def worker(runtime: DistributedRuntime):
await
init
(
runtime
,
cmd_line_args
())
await
init
(
runtime
,
cmd_line_args
())
def
_check_and_set_env_value
(
key
,
expected
,
allow_override
=
False
):
if
not
allow_override
and
key
in
os
.
environ
and
os
.
environ
[
key
]
!=
expected
:
raise
ValueError
(
f
"
{
key
}
is set and doesn't equal expected
{
expected
}
. Please unset variable before launch."
)
os
.
environ
.
setdefault
(
key
,
expected
)
async
def
init
(
runtime
:
DistributedRuntime
,
config
:
Config
):
async
def
init
(
runtime
:
DistributedRuntime
,
config
:
Config
):
"""
"""
Instantiate and serve
Instantiate and serve
"""
"""
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
await
component
.
create_service
()
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
await
register_llm
(
ModelType
.
Backend
,
endpoint
,
config
.
model_path
,
config
.
model_name
)
arg_map
=
{
arg_map
=
{
"model"
:
config
.
model_path
,
"model"
:
config
.
model_path
,
...
@@ -170,14 +171,20 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -170,14 +171,20 @@ async def init(runtime: DistributedRuntime, config: Config):
arg_map
=
{
**
arg_map
,
**
json_map
}
# json_map gets precedence
arg_map
=
{
**
arg_map
,
**
json_map
}
# json_map gets precedence
# Patch won't start KVCacheEventManager unless these four are set
# Patch won't start KVCacheEventManager unless these four are set
os
.
environ
[
"VLLM_WORKER_ID"
]
=
str
(
endpoint
.
lease_id
())
os
.
environ
[
component
=
runtime
.
namespace
(
config
.
namespace
).
component
(
config
.
component
)
"VLLM_KV_CAPI_PATH"
await
component
.
create_service
()
]
=
"libdynamo_llm_capi.so"
# Must be on LD_LIBRARY_PATH
endpoint
=
component
.
endpoint
(
config
.
endpoint
)
os
.
environ
[
"VLLM_KV_NAMESPACE"
]
=
config
.
namespace
os
.
environ
[
"VLLM_KV_COMPONENT"
]
=
config
.
component
_check_and_set_env_value
(
"VLLM_WORKER_ID"
,
str
(
endpoint
.
lease_id
()))
_check_and_set_env_value
(
os
.
environ
[
"VLLM_NO_USAGE_STATS"
]
=
"1"
# Avoid internal HTTP requests
"VLLM_KV_CAPI_PATH"
,
"libdynamo_llm_capi.so"
,
allow_override
=
True
)
_check_and_set_env_value
(
"VLLM_KV_NAMESPACE"
,
config
.
namespace
)
_check_and_set_env_value
(
"VLLM_KV_COMPONENT"
,
config
.
component
)
_check_and_set_env_value
(
"VLLM_NO_USAGE_STATS"
,
"1"
,
allow_override
=
True
)
# Avoid internal HTTP requests
engine_args
=
AsyncEngineArgs
(
**
arg_map
)
engine_args
=
AsyncEngineArgs
(
**
arg_map
)
model_config
=
engine_args
.
create_model_config
()
model_config
=
engine_args
.
create_model_config
()
# Load default sampling params from `generation_config.json`
# Load default sampling params from `generation_config.json`
...
@@ -186,6 +193,9 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -186,6 +193,9 @@ async def init(runtime: DistributedRuntime, config: Config):
engine_context
=
build_async_engine_client_from_engine_args
(
engine_args
)
engine_context
=
build_async_engine_client_from_engine_args
(
engine_args
)
engine_client
=
await
engine_context
.
__aenter__
()
engine_client
=
await
engine_context
.
__aenter__
()
await
register_llm
(
ModelType
.
Backend
,
endpoint
,
config
.
model_path
,
config
.
model_name
)
handler
=
RequestHandler
(
component
,
engine_client
,
default_sampling_params
)
handler
=
RequestHandler
(
component
,
engine_client
,
default_sampling_params
)
handler
.
setup_kv_metrics
()
handler
.
setup_kv_metrics
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment