Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
eb76a8b5
"lib/llm/vscode:/vscode.git/clone" did not exist on "7af49a15b2786e64de5c2cd08e1ebb38d7505b47"
Unverified
Commit
eb76a8b5
authored
Feb 23, 2026
by
Indrajit Bhosale
Committed by
GitHub
Feb 23, 2026
Browse files
chore: Expose new kv_cache metrics from trtllm backend (#6469)
parent
7893f268
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
31 additions
and
1 deletion
+31
-1
components/src/dynamo/trtllm/publisher.py
components/src/dynamo/trtllm/publisher.py
+14
-0
components/src/dynamo/trtllm/request_handlers/handler_base.py
...onents/src/dynamo/trtllm/request_handlers/handler_base.py
+12
-1
components/src/dynamo/trtllm/workers/llm_worker.py
components/src/dynamo/trtllm/workers/llm_worker.py
+5
-0
No files found.
components/src/dynamo/trtllm/publisher.py
View file @
eb76a8b5
...
@@ -304,6 +304,7 @@ class Publisher:
...
@@ -304,6 +304,7 @@ class Publisher:
component_gauges
:
LLMBackendMetrics
,
component_gauges
:
LLMBackendMetrics
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
enable_local_indexer
:
bool
=
False
,
enable_local_indexer
:
bool
=
False
,
metrics_collector
=
None
,
):
):
self
.
endpoint
=
endpoint
self
.
endpoint
=
endpoint
self
.
engine
=
engine
self
.
engine
=
engine
...
@@ -313,6 +314,7 @@ class Publisher:
...
@@ -313,6 +314,7 @@ class Publisher:
self
.
metrics_labels
=
metrics_labels
self
.
metrics_labels
=
metrics_labels
self
.
component_gauges
=
component_gauges
self
.
component_gauges
=
component_gauges
self
.
enable_local_indexer
=
enable_local_indexer
self
.
enable_local_indexer
=
enable_local_indexer
self
.
metrics_collector
=
metrics_collector
self
.
attention_dp_size
=
engine
.
get_attention_dp_size
()
self
.
attention_dp_size
=
engine
.
get_attention_dp_size
()
# The first few kv events from the model engine are always "created" type events.
# The first few kv events from the model engine are always "created" type events.
...
@@ -482,6 +484,16 @@ class Publisher:
...
@@ -482,6 +484,16 @@ class Publisher:
)
)
self
.
component_gauges
.
set_gpu_cache_usage
(
"0"
,
gpu_cache_usage
)
self
.
component_gauges
.
set_gpu_cache_usage
(
"0"
,
gpu_cache_usage
)
# Log iteration stats to TRT-LLM MetricsCollector (PR #11243)
# This populates trtllm_kv_cache_hit_rate and trtllm_kv_cache_utilization gauges
if
self
.
metrics_collector
and
hasattr
(
self
.
metrics_collector
,
"log_iteration_stats"
):
try
:
self
.
metrics_collector
.
log_iteration_stats
(
stat
)
except
Exception
as
e
:
logging
.
warning
(
f
"Failed to log iteration stats:
{
e
}
"
)
await
self
.
_polling_loop
(
await
self
.
_polling_loop
(
lambda
:
self
.
engine
.
llm
.
get_stats_async
(
timeout
=
_STATS_TIMEOUT_SEC
),
lambda
:
self
.
engine
.
llm
.
get_stats_async
(
timeout
=
_STATS_TIMEOUT_SEC
),
handle_stat
,
handle_stat
,
...
@@ -766,6 +778,7 @@ async def get_publisher(
...
@@ -766,6 +778,7 @@ async def get_publisher(
component_gauges
:
LLMBackendMetrics
,
component_gauges
:
LLMBackendMetrics
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
zmq_endpoint
:
Optional
[
str
]
=
None
,
enable_local_indexer
:
bool
=
False
,
enable_local_indexer
:
bool
=
False
,
metrics_collector
=
None
,
):
):
publisher
=
Publisher
(
publisher
=
Publisher
(
endpoint
,
endpoint
,
...
@@ -776,6 +789,7 @@ async def get_publisher(
...
@@ -776,6 +789,7 @@ async def get_publisher(
component_gauges
=
component_gauges
,
component_gauges
=
component_gauges
,
zmq_endpoint
=
zmq_endpoint
,
zmq_endpoint
=
zmq_endpoint
,
enable_local_indexer
=
enable_local_indexer
,
enable_local_indexer
=
enable_local_indexer
,
metrics_collector
=
metrics_collector
,
)
)
try
:
try
:
publisher
.
initialize
()
publisher
.
initialize
()
...
...
components/src/dynamo/trtllm/request_handlers/handler_base.py
View file @
eb76a8b5
...
@@ -802,13 +802,24 @@ class HandlerBase(BaseGenerativeHandler):
...
@@ -802,13 +802,24 @@ class HandlerBase(BaseGenerativeHandler):
)
)
# Log metrics to TensorRT-LLM MetricsCollector when request finishes
# Log metrics to TensorRT-LLM MetricsCollector when request finishes
# NOTE: TRT-LLM 1.3.0rc5 (PR #11243) renamed log_metrics_dict → log_request_metrics_dict
if
(
if
(
res
.
finished
res
.
finished
and
self
.
metrics_collector
and
self
.
metrics_collector
and
hasattr
(
res
,
"metrics_dict"
)
and
hasattr
(
res
,
"metrics_dict"
)
):
):
try
:
try
:
self
.
metrics_collector
.
log_metrics_dict
(
res
.
metrics_dict
)
if
hasattr
(
self
.
metrics_collector
,
"log_request_metrics_dict"
,
):
self
.
metrics_collector
.
log_request_metrics_dict
(
res
.
metrics_dict
)
else
:
self
.
metrics_collector
.
log_metrics_dict
(
res
.
metrics_dict
)
except
Exception
as
e
:
except
Exception
as
e
:
logging
.
warning
(
f
"Failed to log TensorRT-LLM metrics:
{
e
}
"
)
logging
.
warning
(
f
"Failed to log TensorRT-LLM metrics:
{
e
}
"
)
...
...
components/src/dynamo/trtllm/workers/llm_worker.py
View file @
eb76a8b5
...
@@ -181,6 +181,10 @@ async def init_llm_worker(
...
@@ -181,6 +181,10 @@ async def init_llm_worker(
"max_beam_width"
:
config
.
max_beam_width
,
"max_beam_width"
:
config
.
max_beam_width
,
"max_batch_size"
:
config
.
max_batch_size
,
"max_batch_size"
:
config
.
max_batch_size
,
"return_perf_metrics"
:
config
.
publish_events_and_metrics
,
"return_perf_metrics"
:
config
.
publish_events_and_metrics
,
# enable_iter_perf_stats is required for PyTorch backend to compute iteration-level
# stats (KV cache utilization, hit rate). TensorRT backend always has this enabled.
# See TRT-LLM PR #11243: MetricsCollector.log_iteration_stats() needs these stats.
"enable_iter_perf_stats"
:
config
.
publish_events_and_metrics
,
"kv_connector_config"
:
kv_connector_config
,
"kv_connector_config"
:
kv_connector_config
,
}
}
...
@@ -493,6 +497,7 @@ async def init_llm_worker(
...
@@ -493,6 +497,7 @@ async def init_llm_worker(
component_gauges
=
component_gauges
,
component_gauges
=
component_gauges
,
zmq_endpoint
=
trtllm_zmq_bind_endpoint
,
zmq_endpoint
=
trtllm_zmq_bind_endpoint
,
enable_local_indexer
=
config
.
enable_local_indexer
,
enable_local_indexer
=
config
.
enable_local_indexer
,
metrics_collector
=
metrics_collector
,
)
as
publisher
:
)
as
publisher
:
handler_config
.
publisher
=
publisher
handler_config
.
publisher
=
publisher
handler
=
RequestHandlerFactory
().
get_request_handler
(
handler_config
)
handler
=
RequestHandlerFactory
().
get_request_handler
(
handler_config
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment