Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a8e5328e
Unverified
Commit
a8e5328e
authored
Nov 19, 2025
by
Indrajit Bhosale
Committed by
GitHub
Nov 19, 2025
Browse files
ci: TRT-LLM multimodal CI (#4118)
Signed-off-by:
Indrajit Bhosale
<
iamindrajitb@gmail.com
>
parent
0b8b7ffb
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
100 additions
and
1 deletion
+100
-1
examples/backends/trtllm/launch/disagg_multimodal.sh
examples/backends/trtllm/launch/disagg_multimodal.sh
+44
-0
pyproject.toml
pyproject.toml
+1
-0
tests/README.md
tests/README.md
+1
-1
tests/serve/test_trtllm.py
tests/serve/test_trtllm.py
+12
-0
tests/utils/payload_builder.py
tests/utils/payload_builder.py
+42
-0
No files found.
examples/backends/trtllm/launch/disagg_multimodal.sh
0 → 100755
View file @
a8e5328e
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Environment variables with defaults
export
DYNAMO_HOME
=
${
DYNAMO_HOME
:-
"/workspace"
}
export
MODEL_PATH
=
${
MODEL_PATH
:-
"Qwen/Qwen2-VL-7B-Instruct"
}
export
SERVED_MODEL_NAME
=
${
SERVED_MODEL_NAME
:-
"Qwen/Qwen2-VL-7B-Instruct"
}
export
PREFILL_ENGINE_ARGS
=
${
PREFILL_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml"
}
export
DECODE_ENGINE_ARGS
=
${
DECODE_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml"
}
export
PREFILL_CUDA_VISIBLE_DEVICES
=
${
PREFILL_CUDA_VISIBLE_DEVICES
:-
"0"
}
export
DECODE_CUDA_VISIBLE_DEVICES
=
${
DECODE_CUDA_VISIBLE_DEVICES
:-
"1"
}
export
MODALITY
=
${
MODALITY
:-
"multimodal"
}
# Setup cleanup trap
cleanup
()
{
echo
"Cleaning up background processes..."
kill
$DYNAMO_PID
$PREFILL_PID
2>/dev/null
||
true
wait
$DYNAMO_PID
$PREFILL_PID
2>/dev/null
||
true
echo
"Cleanup complete."
}
trap
cleanup EXIT INT TERM
# run frontend
python3
-m
dynamo.frontend
--http-port
8000 &
DYNAMO_PID
=
$!
# run prefill worker
CUDA_VISIBLE_DEVICES
=
$PREFILL_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$PREFILL_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
prefill &
PREFILL_PID
=
$!
# run decode worker
CUDA_VISIBLE_DEVICES
=
$DECODE_CUDA_VISIBLE_DEVICES
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$DECODE_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--disaggregation-mode
decode
pyproject.toml
View file @
a8e5328e
...
...
@@ -196,6 +196,7 @@ markers = [
"trtllm: marks tests as requiring trtllm"
,
"trtllm_marker: marks tests as requiring trtllm"
,
"sglang: marks tests as requiring sglang"
,
"multimodal: marks tests as multimodal (image/video) tests"
,
"slow: marks tests as known to be slow"
,
"h100: marks tests to run on H100"
,
"kvbm: marks tests for KV behavior and model determinism"
,
...
...
tests/README.md
View file @
a8e5328e
...
...
@@ -68,7 +68,7 @@ Markers are required for all tests. They are used for test selection in CI and l
| Category | Marker(s) | Description |
|-------------------------|--------------------------|------------------------------------|
| Lifecycle [required] | pre_merge, post_merge, nightly, weekly, release | When the test should run |
| Test Type [required] | unit, integration, e2e, benchmark, stress | Nature of the test |
| Test Type [required] | unit, integration, e2e, benchmark, stress
, multimodal
| Nature of the test |
| Hardware [required] | gpu_0, gpu_1, gpu_2, gpu_4, gpu_8, h100 | Number/type of GPUs required |
| Component/Framework | vllm, trtllm, sglang, kvbm, planner, router | Backend or component specificity |
| Other | slow, skip, xfail | Special handling |
...
...
tests/serve/test_trtllm.py
View file @
a8e5328e
...
...
@@ -17,6 +17,7 @@ from tests.utils.payload_builder import (
chat_payload_default
,
completion_payload_default
,
metric_payload_default
,
multimodal_payload_default
,
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -105,6 +106,17 @@ trtllm_configs = {
completion_payload_default
(),
],
),
"disaggregated_multimodal"
:
TRTLLMConfig
(
name
=
"disaggregated_multimodal"
,
directory
=
trtllm_dir
,
script_name
=
"disagg_multimodal.sh"
,
marks
=
[
pytest
.
mark
.
gpu_2
,
pytest
.
mark
.
trtllm_marker
,
pytest
.
mark
.
multimodal
],
model
=
"Qwen/Qwen2-VL-7B-Instruct"
,
models_port
=
8000
,
timeout
=
900
,
delayed_start
=
60
,
request_payloads
=
[
multimodal_payload_default
()],
),
}
...
...
tests/utils/payload_builder.py
View file @
a8e5328e
...
...
@@ -66,6 +66,48 @@ def completion_payload_default(
)
def
multimodal_payload_default
(
image_url
:
str
=
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
,
text
:
str
=
"Describe the image"
,
repeat_count
:
int
=
1
,
expected_response
:
Optional
[
List
[
str
]]
=
None
,
expected_log
:
Optional
[
List
[
str
]]
=
None
,
max_tokens
:
int
=
160
,
temperature
:
Optional
[
float
]
=
None
,
stream
:
bool
=
False
,
)
->
ChatPayload
:
"""Create a multimodal chat payload with image and text content.
Args:
image_url: URL of the image to include in the request
text: Text prompt to accompany the image
repeat_count: Number of times to repeat the request
expected_response: List of strings expected in the response
expected_log: List of regex patterns expected in logs
max_tokens: Maximum tokens to generate
temperature: Sampling temperature (optional)
stream: Whether to stream the response
Returns:
ChatPayload configured for multimodal requests
"""
return
chat_payload
(
content
=
[
{
"type"
:
"text"
,
"text"
:
text
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
image_url
},
},
],
repeat_count
=
repeat_count
,
expected_response
=
expected_response
or
[
"image"
],
expected_log
=
expected_log
or
[],
max_tokens
=
max_tokens
,
temperature
=
temperature
,
stream
=
stream
,
)
def
metric_payload_default
(
min_num_requests
:
int
,
repeat_count
:
int
=
1
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment