Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
8c2072cf
Unverified
Commit
8c2072cf
authored
Aug 28, 2025
by
Alec
Committed by
GitHub
Aug 28, 2025
Browse files
fix: [trtllm] add wait_for_instance before register_llm (#2683)
Signed-off-by:
alec-flowers
<
aflowers@nvidia.com
>
parent
63f5bbc0
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
91 additions
and
45 deletions
+91
-45
components/backends/trtllm/src/dynamo/trtllm/main.py
components/backends/trtllm/src/dynamo/trtllm/main.py
+17
-11
tests/serve/common.py
tests/serve/common.py
+1
-1
tests/serve/test_trtllm.py
tests/serve/test_trtllm.py
+4
-14
tests/serve/test_vllm.py
tests/serve/test_vllm.py
+1
-10
tests/utils/managed_process.py
tests/utils/managed_process.py
+68
-9
No files found.
components/backends/trtllm/src/dynamo/trtllm/main.py
View file @
8c2072cf
...
...
@@ -239,17 +239,6 @@ async def init(runtime: DistributedRuntime, config: Config):
runtime_config
.
reasoning_parser
=
config
.
reasoning_parser
runtime_config
.
tool_call_parser
=
config
.
tool_call_parser
if
is_first_worker
(
config
):
# Register the model with runtime config
await
register_llm
(
modelType
,
endpoint
,
config
.
model_path
,
config
.
served_model_name
,
kv_cache_block_size
=
config
.
kv_block_size
,
migration_limit
=
config
.
migration_limit
,
runtime_config
=
runtime_config
,
)
# publisher will be set later if publishing is enabled.
handler_config
=
RequestHandlerConfig
(
component
=
component
,
...
...
@@ -262,6 +251,23 @@ async def init(runtime: DistributedRuntime, config: Config):
multimodal_processor
=
multimodal_processor
,
)
if
next_client
:
logging
.
info
(
f
"Waiting for the next endpoint to be ready:
{
config
.
next_endpoint
}
"
)
await
next_client
.
wait_for_instances
()
if
is_first_worker
(
config
):
# Register the model with runtime config
await
register_llm
(
modelType
,
endpoint
,
config
.
model_path
,
config
.
served_model_name
,
kv_cache_block_size
=
config
.
kv_block_size
,
migration_limit
=
config
.
migration_limit
,
)
if
config
.
publish_events_and_metrics
and
is_first_worker
(
config
):
# Initialize and pass in the publisher to the request handler to
# publish events and metrics.
...
...
tests/serve/common.py
View file @
8c2072cf
...
...
@@ -23,7 +23,7 @@ class EngineConfig:
endpoints
:
List
[
str
]
response_handlers
:
List
[
Callable
[[
Any
],
str
]]
model
:
str
timeout
:
int
=
12
0
timeout
:
int
=
60
0
delayed_start
:
int
=
0
...
...
tests/serve/test_trtllm.py
View file @
8c2072cf
...
...
@@ -22,8 +22,6 @@ logger = logging.getLogger(__name__)
class
TRTLLMConfig
(
EngineConfig
):
"""Configuration for trtllm test scenarios"""
timeout
:
int
=
60
class
TRTLLMProcess
(
EngineProcess
):
"""Simple process manager for trtllm shell scripts"""
...
...
@@ -71,9 +69,7 @@ trtllm_configs = {
chat_completions_response_handler
,
completions_response_handler
,
],
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
delayed_start
=
0
,
timeout
=
360
,
model
=
"Qwen/Qwen3-0.6B"
,
),
"disaggregated"
:
TRTLLMConfig
(
name
=
"disaggregated"
,
...
...
@@ -85,9 +81,7 @@ trtllm_configs = {
chat_completions_response_handler
,
completions_response_handler
,
],
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
delayed_start
=
0
,
timeout
=
360
,
model
=
"Qwen/Qwen3-0.6B"
,
),
# TODO: These are sanity tests that the kv router examples launch
# and inference without error, but do not do detailed checks on the
...
...
@@ -102,9 +96,7 @@ trtllm_configs = {
chat_completions_response_handler
,
completions_response_handler
,
],
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
delayed_start
=
0
,
timeout
=
360
,
model
=
"Qwen/Qwen3-0.6B"
,
),
"disaggregated_router"
:
TRTLLMConfig
(
name
=
"disaggregated_router"
,
...
...
@@ -116,9 +108,7 @@ trtllm_configs = {
chat_completions_response_handler
,
completions_response_handler
,
],
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
delayed_start
=
0
,
timeout
=
360
,
model
=
"Qwen/Qwen3-0.6B"
,
),
}
...
...
tests/serve/test_vllm.py
View file @
8c2072cf
...
...
@@ -133,8 +133,6 @@ vllm_configs = {
completions_response_handler
,
],
model
=
"Qwen/Qwen3-0.6B"
,
delayed_start
=
0
,
timeout
=
360
,
),
"agg-router"
:
VLLMConfig
(
name
=
"agg-router"
,
...
...
@@ -147,8 +145,6 @@ vllm_configs = {
completions_response_handler
,
],
model
=
"Qwen/Qwen3-0.6B"
,
delayed_start
=
0
,
timeout
=
360
,
),
"disaggregated"
:
VLLMConfig
(
name
=
"disaggregated"
,
...
...
@@ -161,8 +157,6 @@ vllm_configs = {
completions_response_handler
,
],
model
=
"Qwen/Qwen3-0.6B"
,
delayed_start
=
0
,
timeout
=
360
,
),
"deepep"
:
VLLMConfig
(
name
=
"deepep"
,
...
...
@@ -179,7 +173,6 @@ vllm_configs = {
completions_response_handler
,
],
model
=
"deepseek-ai/DeepSeek-V2-Lite"
,
delayed_start
=
0
,
args
=
[
"--model"
,
"deepseek-ai/DeepSeek-V2-Lite"
,
...
...
@@ -190,7 +183,7 @@ vllm_configs = {
"--gpus-per-node"
,
"2"
,
],
timeout
=
56
0
,
timeout
=
70
0
,
),
"multimodal_agg_llava"
:
VLLMConfig
(
name
=
"multimodal_agg_llava"
,
...
...
@@ -202,9 +195,7 @@ vllm_configs = {
chat_completions_response_handler
,
],
model
=
"llava-hf/llava-1.5-7b-hf"
,
delayed_start
=
0
,
args
=
[
"--model"
,
"llava-hf/llava-1.5-7b-hf"
],
timeout
=
360
,
),
"multimodal_agg_qwen"
:
VLLMConfig
(
name
=
"multimodal_agg_qwen"
,
...
...
tests/utils/managed_process.py
View file @
8c2072cf
...
...
@@ -17,6 +17,7 @@ import json
import
logging
import
os
import
shutil
import
signal
import
socket
import
subprocess
import
time
...
...
@@ -82,6 +83,10 @@ class ManagedProcess:
straggler_commands
:
List
[
str
]
=
field
(
default_factory
=
list
)
log_dir
:
str
=
os
.
getcwd
()
# Ensure attributes exist even if startup fails early
proc
:
Optional
[
subprocess
.
Popen
]
=
None
_pgid
:
Optional
[
int
]
=
None
_logger
=
logging
.
getLogger
()
_command_name
=
None
_log_path
=
None
...
...
@@ -107,20 +112,30 @@ class ManagedProcess:
return
self
except
Exception
as
e
:
self
.
__exit__
(
None
,
None
,
None
)
raise
e
except
Exception
:
try
:
self
.
__exit__
(
None
,
None
,
None
)
except
Exception
as
cleanup_err
:
self
.
_logger
.
warning
(
"Error during cleanup in __enter__: %s"
,
cleanup_err
)
raise
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
self
.
_terminate_process_group
()
process_list
=
[
self
.
proc
,
self
.
_tee_proc
,
self
.
_sed_proc
]
for
process
in
process_list
:
if
process
:
if
process
.
stdout
:
process
.
stdout
.
close
()
if
process
.
stdin
:
process
.
stdin
.
close
()
terminate_process_tree
(
process
.
pid
,
self
.
_logger
)
process
.
wait
()
try
:
if
process
.
stdout
:
process
.
stdout
.
close
()
if
process
.
stdin
:
process
.
stdin
.
close
()
terminate_process_tree
(
process
.
pid
,
self
.
_logger
)
process
.
wait
()
except
Exception
as
e
:
self
.
_logger
.
warning
(
"Error terminating process: %s"
,
e
)
if
self
.
data_dir
:
self
.
_remove_directory
(
self
.
data_dir
)
...
...
@@ -169,6 +184,12 @@ class ManagedProcess:
stderr
=
stderr
,
start_new_session
=
True
,
# Isolate process group to prevent kill 0 from affecting parent
)
# Capture the child's process group id for robust cleanup even if parent shell exits
try
:
self
.
_pgid
=
os
.
getpgid
(
self
.
proc
.
pid
)
except
Exception
as
e
:
self
.
_logger
.
warning
(
"Could not get process group id: %s"
,
e
)
self
.
_pgid
=
None
self
.
_sed_proc
=
subprocess
.
Popen
(
[
"sed"
,
"-u"
,
f
"s/^/[
{
self
.
_command_name
.
upper
()
}
] /"
],
stdin
=
self
.
proc
.
stdout
,
...
...
@@ -190,6 +211,12 @@ class ManagedProcess:
stderr
=
stderr
,
start_new_session
=
True
,
# Isolate process group to prevent kill 0 from affecting parent
)
# Capture the child's process group id for robust cleanup even if parent shell exits
try
:
self
.
_pgid
=
os
.
getpgid
(
self
.
proc
.
pid
)
except
Exception
as
e
:
self
.
_logger
.
warning
(
"Could not get process group id: %s"
,
e
)
self
.
_pgid
=
None
self
.
_sed_proc
=
subprocess
.
Popen
(
[
"sed"
,
"-u"
,
f
"s/^/[
{
self
.
_command_name
.
upper
()
}
] /"
],
...
...
@@ -198,6 +225,38 @@ class ManagedProcess:
)
self
.
_tee_proc
=
None
def
_terminate_process_group
(
self
,
timeout
:
float
=
5.0
):
"""Terminate the entire process group/session started for the child.
This catches cases where the launcher shell exits and its children are reparented,
leaving no parent PID to traverse, but they remain in the same process group.
"""
if
self
.
_pgid
is
None
:
return
try
:
self
.
_logger
.
info
(
"Terminating process group: %s"
,
self
.
_pgid
)
os
.
killpg
(
self
.
_pgid
,
signal
.
SIGTERM
)
except
ProcessLookupError
:
return
except
Exception
as
e
:
self
.
_logger
.
warning
(
"Error sending SIGTERM to process group %s: %s"
,
self
.
_pgid
,
e
)
return
# Give processes a brief moment to exit gracefully
time
.
sleep
(
timeout
)
# Force kill if anything remains
try
:
os
.
killpg
(
self
.
_pgid
,
signal
.
SIGKILL
)
except
ProcessLookupError
:
pass
except
Exception
as
e
:
self
.
_logger
.
warning
(
"Error sending SIGKILL to process group %s: %s"
,
self
.
_pgid
,
e
)
def
_remove_directory
(
self
,
path
:
str
)
->
None
:
"""Remove a directory."""
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment