Unverified Commit 55dfb539 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update scheduler_metrics_mixin.py, collector.py (20251104) (#12647)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarSangBin Cho <rkooo567@gmail.com>
parent 42889acb
......@@ -164,6 +164,8 @@ class SchedulerMetricsMixin:
self.stats.token_usage = token_usage
if self.is_hybrid:
self.stats.swa_token_usage = swa_token_usage
if self.is_hybrid_gdn:
self.stats.mamba_usage = mamba_usage
self.stats.num_queue_reqs = len(self.waiting_queue)
self.stats.num_grammar_queue_reqs = len(self.grammar_queue)
self.stats.cache_hit_rate = cache_hit_rate
......@@ -306,6 +308,8 @@ class SchedulerMetricsMixin:
self.stats.token_usage = token_usage
if self.is_hybrid:
self.stats.swa_token_usage = swa_token_usage
if self.is_hybrid_gdn:
self.stats.mamba_usage = mamba_usage
self.stats.gen_throughput = self.last_gen_throughput
self.stats.num_queue_reqs = len(self.waiting_queue)
self.stats.num_grammar_queue_reqs = len(self.grammar_queue)
......
......@@ -150,6 +150,7 @@ class SchedulerStats:
token_usage: float = 0.0
pending_prealloc_token_usage: float = 0.0
swa_token_usage: float = 0.0
mamba_usage: float = 0.0
gen_throughput: float = 0.0
num_queue_reqs: int = 0
num_grammar_queue_reqs: int = 0
......@@ -225,6 +226,12 @@ class SchedulerMetricsCollector:
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
self.mamba_usage = Gauge(
name="sglang:mamba_usage",
documentation="The token usage for Mamba layers.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
self.gen_throughput = Gauge(
name="sglang:gen_throughput",
documentation="The generation throughput (token/s).",
......@@ -581,6 +588,7 @@ class SchedulerMetricsCollector:
self.pending_prealloc_token_usage, stats.pending_prealloc_token_usage
)
self._log_gauge(self.swa_token_usage, stats.swa_token_usage)
self._log_gauge(self.mamba_usage, stats.mamba_usage)
self._log_gauge(self.gen_throughput, stats.gen_throughput)
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)
self._log_gauge(self.num_grammar_queue_reqs, stats.num_grammar_queue_reqs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment