Unverified Commit 21ca4c3a authored by Yingchun Lai's avatar Yingchun Lai Committed by GitHub
Browse files

[PD metrics] Fix some uncompleted PD related metrics (#8627)

parent e3cf812f
...@@ -1513,6 +1513,20 @@ class Scheduler( ...@@ -1513,6 +1513,20 @@ class Scheduler(
self.stats.gen_throughput = 0 self.stats.gen_throughput = 0
self.stats.num_queue_reqs = len(self.waiting_queue) self.stats.num_queue_reqs = len(self.waiting_queue)
self.stats.num_grammar_queue_reqs = len(self.grammar_queue) self.stats.num_grammar_queue_reqs = len(self.grammar_queue)
if self.disaggregation_mode == DisaggregationMode.PREFILL:
self.stats.num_prefill_prealloc_queue_reqs = len(
self.disagg_prefill_bootstrap_queue.queue
)
self.stats.num_prefill_inflight_queue_reqs = len(
self.disagg_prefill_inflight_queue
)
if self.disaggregation_mode == DisaggregationMode.DECODE:
self.stats.num_decode_prealloc_queue_reqs = len(
self.disagg_decode_prealloc_queue.queue
)
self.stats.num_decode_transfer_queue_reqs = len(
self.disagg_decode_transfer_queue.queue
)
self.metrics_collector.log_stats(self.stats) self.metrics_collector.log_stats(self.stats)
self._publish_kv_events() self._publish_kv_events()
......
...@@ -230,7 +230,7 @@ class SchedulerMetricsMixin: ...@@ -230,7 +230,7 @@ class SchedulerMetricsMixin:
self.stats.num_grammar_queue_reqs = len(self.grammar_queue) self.stats.num_grammar_queue_reqs = len(self.grammar_queue)
self.stats.spec_accept_length = spec_accept_length self.stats.spec_accept_length = spec_accept_length
self.stats.total_retracted_reqs = self.total_retracted_reqs self.stats.total_retracted_reqs = self.total_retracted_reqs
self.metrics_collector.log_stats(self.stats) self.stats.avg_request_queue_latency = 0.0
if self.disaggregation_mode == DisaggregationMode.DECODE: if self.disaggregation_mode == DisaggregationMode.DECODE:
self.stats.num_decode_prealloc_queue_reqs = len( self.stats.num_decode_prealloc_queue_reqs = len(
self.disagg_decode_prealloc_queue.queue self.disagg_decode_prealloc_queue.queue
...@@ -238,6 +238,7 @@ class SchedulerMetricsMixin: ...@@ -238,6 +238,7 @@ class SchedulerMetricsMixin:
self.stats.num_decode_transfer_queue_reqs = len( self.stats.num_decode_transfer_queue_reqs = len(
self.disagg_decode_transfer_queue.queue self.disagg_decode_transfer_queue.queue
) )
self.metrics_collector.log_stats(self.stats)
self._emit_kv_metrics() self._emit_kv_metrics()
self._publish_kv_events() self._publish_kv_events()
......
...@@ -539,6 +539,7 @@ class SchedulerMetricsCollector: ...@@ -539,6 +539,7 @@ class SchedulerMetricsCollector:
self.num_running_reqs_offline_batch, stats.num_running_reqs_offline_batch self.num_running_reqs_offline_batch, stats.num_running_reqs_offline_batch
) )
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate) self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)
self._log_gauge(self.avg_request_queue_latency, stats.avg_request_queue_latency)
# Speculative decoding # Speculative decoding
self._log_gauge(self.spec_accept_length, stats.spec_accept_length) self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment