Unverified Commit 007c5b60 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix: sglang metrics and prefill router fix (#5147)

parent e5502e78
...@@ -254,6 +254,7 @@ def setup_prometheus_registry( ...@@ -254,6 +254,7 @@ def setup_prometheus_registry(
endpoint=generate_endpoint, endpoint=generate_endpoint,
registry=registry, registry=registry,
metric_prefix_filters=["sglang:"], metric_prefix_filters=["sglang:"],
add_prefix="sglang_",
) )
return registry return registry
......
...@@ -96,6 +96,21 @@ class PrefillWorkerHandler(BaseWorkerHandler): ...@@ -96,6 +96,21 @@ class PrefillWorkerHandler(BaseWorkerHandler):
bootstrap_room = self._generate_bootstrap_room() bootstrap_room = self._generate_bootstrap_room()
logging.debug(f"Generated bootstrap_room locally: {bootstrap_room}") logging.debug(f"Generated bootstrap_room locally: {bootstrap_room}")
bootstrap_info = {
"bootstrap_host": self.bootstrap_host,
"bootstrap_port": self.bootstrap_port,
"bootstrap_room": bootstrap_room,
}
# Yield bootstrap_info for PrefillRouter - required for async generator contract
# and Rust-side expects disaggregated_params in first output
yield {
"token_ids": [],
"text": None,
"finish_reason": None,
"disaggregated_params": bootstrap_info,
}
input_param = self._get_input_param(inner_request) input_param = self._get_input_param(inner_request)
# Propagate trace context to SGLang # Propagate trace context to SGLang
......
...@@ -559,8 +559,8 @@ class MetricsPayload(BasePayload): ...@@ -559,8 +559,8 @@ class MetricsPayload(BasePayload):
metrics_to_check.append( metrics_to_check.append(
MetricCheck( MetricCheck(
# Check: Minimum count of unique sglang:* metrics # Check: Minimum count of unique sglang:* metrics
name="sglang:*", name="sglang_*",
pattern=lambda name: r"^sglang:\w+", pattern=lambda name: r"^sglang_\w+",
validator=lambda value: len(set(value)) validator=lambda value: len(set(value))
>= 20, # 80% of typical ~25 sglang metrics (excluding _bucket) as of 2025-10-22 (but will grow) >= 20, # 80% of typical ~25 sglang metrics (excluding _bucket) as of 2025-10-22 (but will grow)
error_msg=lambda name, value: f"Expected at least 20 unique sglang:* metrics, but found only {len(set(value))}", error_msg=lambda name, value: f"Expected at least 20 unique sglang:* metrics, but found only {len(set(value))}",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment