Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
eb76a8b5
Unverified
Commit
eb76a8b5
authored
Feb 23, 2026
by
Indrajit Bhosale
Committed by
GitHub
Feb 23, 2026
Browse files
chore: Expose new kv_cache metrics from trtllm backend (#6469)
parent
7893f268
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
31 additions
and
1 deletion
+31
-1
components/src/dynamo/trtllm/publisher.py
components/src/dynamo/trtllm/publisher.py
+14
-0
components/src/dynamo/trtllm/request_handlers/handler_base.py
...onents/src/dynamo/trtllm/request_handlers/handler_base.py
+12
-1
components/src/dynamo/trtllm/workers/llm_worker.py
components/src/dynamo/trtllm/workers/llm_worker.py
+5
-0
No files found.
components/src/dynamo/trtllm/publisher.py
View file @
eb76a8b5
...
@@ -304,6 +304,7 @@ class Publisher:
...
@@ -304,6 +304,7 @@ class Publisher:
component_gauges
:
LLMBackendMetrics
,
component_gauges
:
LLMBackendMetrics
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
enable_local_indexer
:
bool
=
False
,
enable_local_indexer
:
bool
=
False
,
metrics_collector
=
None
,
):
):
self
.
endpoint
=
endpoint
self
.
endpoint
=
endpoint
self
.
engine
=
engine
self
.
engine
=
engine
...
@@ -313,6 +314,7 @@ class Publisher:
...
@@ -313,6 +314,7 @@ class Publisher:
self
.
metrics_labels
=
metrics_labels
self
.
metrics_labels
=
metrics_labels
self
.
component_gauges
=
component_gauges
self
.
component_gauges
=
component_gauges
self
.
enable_local_indexer
=
enable_local_indexer
self
.
enable_local_indexer
=
enable_local_indexer
self
.
metrics_collector
=
metrics_collector
self
.
attention_dp_size
=
engine
.
get_attention_dp_size
()
self
.
attention_dp_size
=
engine
.
get_attention_dp_size
()
# The first few kv events from the model engine are always "created" type events.
# The first few kv events from the model engine are always "created" type events.
...
@@ -482,6 +484,16 @@ class Publisher:
...
@@ -482,6 +484,16 @@ class Publisher:
)
)
self
.
component_gauges
.
set_gpu_cache_usage
(
"0"
,
gpu_cache_usage
)
self
.
component_gauges
.
set_gpu_cache_usage
(
"0"
,
gpu_cache_usage
)
# Log iteration stats to TRT-LLM MetricsCollector (PR #11243)
# This populates trtllm_kv_cache_hit_rate and trtllm_kv_cache_utilization gauges
if
self
.
metrics_collector
and
hasattr
(
self
.
metrics_collector
,
"log_iteration_stats"
):
try
:
self
.
metrics_collector
.
log_iteration_stats
(
stat
)
except
Exception
as
e
:
logging
.
warning
(
f
"Failed to log iteration stats:
{
e
}
"
)
await
self
.
_polling_loop
(
await
self
.
_polling_loop
(
lambda
:
self
.
engine
.
llm
.
get_stats_async
(
timeout
=
_STATS_TIMEOUT_SEC
),
lambda
:
self
.
engine
.
llm
.
get_stats_async
(
timeout
=
_STATS_TIMEOUT_SEC
),
handle_stat
,
handle_stat
,
...
@@ -766,6 +778,7 @@ async def get_publisher(
...
@@ -766,6 +778,7 @@ async def get_publisher(
component_gauges
:
LLMBackendMetrics
,
component_gauges
:
LLMBackendMetrics
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
enable_local_indexer
:
bool
=
False
,
enable_local_indexer
:
bool
=
False
,
metrics_collector
=
None
,
):
):
publisher
=
Publisher
(
publisher
=
Publisher
(
endpoint
,
endpoint
,
...
@@ -776,6 +789,7 @@ async def get_publisher(
...
@@ -776,6 +789,7 @@ async def get_publisher(
component_gauges
=
component_gauges
,
component_gauges
=
component_gauges
,
zmq_endpoint
=
zmq_endpoint
,
zmq_endpoint
=
zmq_endpoint
,
enable_local_indexer
=
enable_local_indexer
,
enable_local_indexer
=
enable_local_indexer
,
metrics_collector
=
metrics_collector
,
)
)
try
:
try
:
publisher
.
initialize
()
publisher
.
initialize
()
...
...
components/src/dynamo/trtllm/request_handlers/handler_base.py
View file @
eb76a8b5
...
@@ -802,13 +802,24 @@ class HandlerBase(BaseGenerativeHandler):
...
@@ -802,13 +802,24 @@ class HandlerBase(BaseGenerativeHandler):
)
)
# Log metrics to TensorRT-LLM MetricsCollector when request finishes
# Log metrics to TensorRT-LLM MetricsCollector when request finishes
# NOTE: TRT-LLM 1.3.0rc5 (PR #11243) renamed log_metrics_dict → log_request_metrics_dict
if
(
if
(
res
.
finished
res
.
finished
and
self
.
metrics_collector
and
self
.
metrics_collector
and
hasattr
(
res
,
"metrics_dict"
)
and
hasattr
(
res
,
"metrics_dict"
)
):
):
try
:
try
:
self
.
metrics_collector
.
log_metrics_dict
(
res
.
metrics_dict
)
if
hasattr
(
self
.
metrics_collector
,
"log_request_metrics_dict"
,
):
self
.
metrics_collector
.
log_request_metrics_dict
(
res
.
metrics_dict
)
else
:
self
.
metrics_collector
.
log_metrics_dict
(
res
.
metrics_dict
)
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
warning
(
f
"Failed to log TensorRT-LLM metrics:
{
e
}
"
)
logging
.
warning
(
f
"Failed to log TensorRT-LLM metrics:
{
e
}
"
)
...
...
components/src/dynamo/trtllm/workers/llm_worker.py
View file @
eb76a8b5
...
@@ -181,6 +181,10 @@ async def init_llm_worker(
...
@@ -181,6 +181,10 @@ async def init_llm_worker(
"max_beam_width"
:
config
.
max_beam_width
,
"max_beam_width"
:
config
.
max_beam_width
,
"max_batch_size"
:
config
.
max_batch_size
,
"max_batch_size"
:
config
.
max_batch_size
,
"return_perf_metrics"
:
config
.
publish_events_and_metrics
,
"return_perf_metrics"
:
config
.
publish_events_and_metrics
,
# enable_iter_perf_stats is required for PyTorch backend to compute iteration-level
# stats (KV cache utilization, hit rate). TensorRT backend always has this enabled.
# See TRT-LLM PR #11243: MetricsCollector.log_iteration_stats() needs these stats.
"enable_iter_perf_stats"
:
config
.
publish_events_and_metrics
,
"kv_connector_config"
:
kv_connector_config
,
"kv_connector_config"
:
kv_connector_config
,
}
}
...
@@ -493,6 +497,7 @@ async def init_llm_worker(
...
@@ -493,6 +497,7 @@ async def init_llm_worker(
component_gauges
=
component_gauges
,
component_gauges
=
component_gauges
,
zmq_endpoint
=
trtllm_zmq_bind_endpoint
,
zmq_endpoint
=
trtllm_zmq_bind_endpoint
,
enable_local_indexer
=
config
.
enable_local_indexer
,
enable_local_indexer
=
config
.
enable_local_indexer
,
metrics_collector
=
metrics_collector
,
)
as
publisher
:
)
as
publisher
:
handler_config
.
publisher
=
publisher
handler_config
.
publisher
=
publisher
handler
=
RequestHandlerFactory
().
get_request_handler
(
handler_config
)
handler
=
RequestHandlerFactory
().
get_request_handler
(
handler_config
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment