Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
66dfc494
"vscode:/vscode.git/clone" did not exist on "44cfbb6880309be1eb0bc1f35dcd1da574a71b51"
Unverified
Commit
66dfc494
authored
Jan 07, 2026
by
Indrajit Bhosale
Committed by
GitHub
Jan 07, 2026
Browse files
fix: KvCacheConfig Settings Lost When Publishing Events (#5198)
parent
996077f4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
57 additions
and
7 deletions
+57
-7
components/src/dynamo/trtllm/main.py
components/src/dynamo/trtllm/main.py
+6
-6
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/agg.yaml
...kends/trtllm/engine_configs/qwen2-vl-7b-instruct/agg.yaml
+1
-1
examples/backends/trtllm/launch/agg_multimodal.sh
examples/backends/trtllm/launch/agg_multimodal.sh
+34
-0
tests/serve/test_trtllm.py
tests/serve/test_trtllm.py
+16
-0
No files found.
components/src/dynamo/trtllm/main.py
View file @
66dfc494
...
@@ -208,14 +208,14 @@ async def init(runtime: DistributedRuntime, config: Config):
...
@@ -208,14 +208,14 @@ async def init(runtime: DistributedRuntime, config: Config):
if
config
.
publish_events_and_metrics
:
if
config
.
publish_events_and_metrics
:
# 'event_buffer_max_size' is required to enable TRTLLM to publish kv cache events.
# 'event_buffer_max_size' is required to enable TRTLLM to publish kv cache events.
# Add it to kv_cache_config while preserving
cache_transceiver_config
from YAML
# Add it to kv_cache_config while preserving
all settings
from YAML
current_kv_config
=
arg_map
[
"kv_cache_config"
]
current_kv_config
=
arg_map
[
"kv_cache_config"
]
if
isinstance
(
current_kv_config
,
KvCacheConfig
):
if
isinstance
(
current_kv_config
,
KvCacheConfig
):
# Convert KvCacheConfig object to dict
(no cache_transceiver_config to preserve)
# Convert KvCacheConfig object to dict
, preserving ALL existing settings
arg_map
[
"kv_cache_config"
]
=
{
# This ensures YAML overrides are not lost when adding event_buffer_max_size
"free_gpu_memory_fraction"
:
config
.
free_gpu_memory_fraction
,
kv_config_dict
=
current_kv_config
.
model_dump
(
exclude_none
=
True
)
"event_buffer_max_size"
:
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
,
kv_config_dict
[
"event_buffer_max_size"
]
=
DEFAULT_KV_EVENT_BUFFER_MAX_SIZE
}
arg_map
[
"kv_cache_config"
]
=
kv_config_dict
elif
isinstance
(
current_kv_config
,
dict
):
elif
isinstance
(
current_kv_config
,
dict
):
# Add event_buffer_max_size while preserving cache_transceiver_config and other YAML settings
# Add event_buffer_max_size while preserving cache_transceiver_config and other YAML settings
current_kv_config
[
current_kv_config
[
...
...
examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/agg.yaml
View file @
66dfc494
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
tensor_parallel_size
:
8
tensor_parallel_size
:
1
moe_expert_parallel_size
:
1
moe_expert_parallel_size
:
1
enable_attention_dp
:
false
enable_attention_dp
:
false
max_num_tokens
:
4096
max_num_tokens
:
4096
...
...
examples/backends/trtllm/launch/agg_multimodal.sh
0 → 100755
View file @
66dfc494
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Environment variables with defaults
export
DYNAMO_HOME
=
${
DYNAMO_HOME
:-
"/workspace"
}
export
MODEL_PATH
=
${
MODEL_PATH
:-
"Qwen/Qwen2-VL-7B-Instruct"
}
export
SERVED_MODEL_NAME
=
${
SERVED_MODEL_NAME
:-
"Qwen/Qwen2-VL-7B-Instruct"
}
export
AGG_ENGINE_ARGS
=
${
AGG_ENGINE_ARGS
:-
"
$DYNAMO_HOME
/examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/agg.yaml"
}
export
MODALITY
=
${
MODALITY
:-
"multimodal"
}
# Setup cleanup trap
cleanup
()
{
echo
"Cleaning up background processes..."
kill
$DYNAMO_PID
2>/dev/null
||
true
wait
$DYNAMO_PID
2>/dev/null
||
true
echo
"Cleanup complete."
}
trap
cleanup EXIT INT TERM
# run frontend
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python3
-m
dynamo.frontend
--router-mode
kv &
DYNAMO_PID
=
$!
# run worker
python3
-m
dynamo.trtllm
\
--model-path
"
$MODEL_PATH
"
\
--served-model-name
"
$SERVED_MODEL_NAME
"
\
--extra-engine-args
"
$AGG_ENGINE_ARGS
"
\
--modality
"
$MODALITY
"
\
--publish-events-and-metrics
tests/serve/test_trtllm.py
View file @
66dfc494
...
@@ -183,6 +183,22 @@ trtllm_configs = {
...
@@ -183,6 +183,22 @@ trtllm_configs = {
delayed_start
=
60
,
delayed_start
=
60
,
request_payloads
=
[
multimodal_payload_default
()],
request_payloads
=
[
multimodal_payload_default
()],
),
),
"aggregated_multimodal_router"
:
TRTLLMConfig
(
name
=
"aggregated_multimodal_router"
,
directory
=
trtllm_dir
,
script_name
=
"agg_multimodal.sh"
,
marks
=
[
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
trtllm
,
pytest
.
mark
.
multimodal
,
pytest
.
mark
.
nightly
,
],
model
=
"Qwen/Qwen2-VL-7B-Instruct"
,
frontend_port
=
DefaultPort
.
FRONTEND
.
value
,
timeout
=
900
,
delayed_start
=
60
,
request_payloads
=
[
multimodal_payload_default
()],
),
"completions_only"
:
TRTLLMConfig
(
"completions_only"
:
TRTLLMConfig
(
name
=
"completions_only"
,
name
=
"completions_only"
,
directory
=
trtllm_dir
,
directory
=
trtllm_dir
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment