Unverified Commit b98cf398 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update collector.py (20251014) (#11625)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarByron Hsu <byronhsu1230@gmail.com>
parent 27d71045
......@@ -118,6 +118,7 @@ class SchedulerStats:
num_running_reqs: int = 0
num_used_tokens: int = 0
token_usage: float = 0.0
pending_prealloc_token_usage: float = 0.0
swa_token_usage: float = 0.0
gen_throughput: float = 0.0
num_queue_reqs: int = 0
......@@ -177,6 +178,12 @@ class SchedulerMetricsCollector:
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
self.pending_prealloc_token_usage = Gauge(
name="sglang:pending_prealloc_token_usage",
documentation="The token usage for pending preallocated tokens (not preallocated yet).",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
self.swa_token_usage = Gauge(
name="sglang:swa_token_usage",
documentation="The token usage for SWA layers.",
......@@ -516,6 +523,9 @@ class SchedulerMetricsCollector:
self._log_gauge(self.num_running_reqs, stats.num_running_reqs)
self._log_gauge(self.num_used_tokens, stats.num_used_tokens)
self._log_gauge(self.token_usage, stats.token_usage)
self._log_gauge(
self.pending_prealloc_token_usage, stats.pending_prealloc_token_usage
)
self._log_gauge(self.swa_token_usage, stats.swa_token_usage)
self._log_gauge(self.gen_throughput, stats.gen_throughput)
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment