Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
8fcc55cf
Unverified
Commit
8fcc55cf
authored
Jul 18, 2025
by
Simo Lin
Committed by
GitHub
Jul 18, 2025
Browse files
[router] router metrics cleanup (#8158)
parent
610381b7
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
378 additions
and
115 deletions
+378
-115
sgl-router/src/lib.rs
sgl-router/src/lib.rs
+2
-2
sgl-router/src/metrics.rs
sgl-router/src/metrics.rs
+324
-0
sgl-router/src/policies/cache_aware.rs
sgl-router/src/policies/cache_aware.rs
+7
-9
sgl-router/src/policies/power_of_two.rs
sgl-router/src/policies/power_of_two.rs
+2
-3
sgl-router/src/prometheus.rs
sgl-router/src/prometheus.rs
+0
-40
sgl-router/src/routers/pd_router.rs
sgl-router/src/routers/pd_router.rs
+21
-19
sgl-router/src/routers/pd_types.rs
sgl-router/src/routers/pd_types.rs
+0
-14
sgl-router/src/routers/router.rs
sgl-router/src/routers/router.rs
+20
-26
sgl-router/src/server.rs
sgl-router/src/server.rs
+2
-2
No files found.
sgl-router/src/lib.rs
View file @
8fcc55cf
...
...
@@ -3,14 +3,14 @@ pub mod config;
pub
mod
logging
;
use
std
::
collections
::
HashMap
;
pub
mod
core
;
pub
mod
metrics
;
pub
mod
openai_api_types
;
pub
mod
policies
;
pub
mod
prometheus
;
pub
mod
routers
;
pub
mod
server
;
pub
mod
service_discovery
;
pub
mod
tree
;
use
crate
::
prometheu
s
::
PrometheusConfig
;
use
crate
::
metric
s
::
PrometheusConfig
;
#[pyclass(eq)]
#[derive(Clone,
PartialEq,
Debug)]
...
...
sgl-router/src/metrics.rs
0 → 100644
View file @
8fcc55cf
use
metrics
::{
counter
,
describe_counter
,
describe_gauge
,
describe_histogram
,
gauge
,
histogram
};
use
metrics_exporter_prometheus
::{
Matcher
,
PrometheusBuilder
};
use
std
::
net
::{
IpAddr
,
Ipv4Addr
,
SocketAddr
};
use
std
::
time
::
Duration
;
#[derive(Debug,
Clone)]
pub
struct
PrometheusConfig
{
pub
port
:
u16
,
pub
host
:
String
,
}
impl
Default
for
PrometheusConfig
{
fn
default
()
->
Self
{
Self
{
port
:
29000
,
host
:
"0.0.0.0"
.to_string
(),
}
}
}
pub
fn
init_metrics
()
{
// Request metrics
describe_counter!
(
"sgl_router_requests_total"
,
"Total number of requests by route and method"
);
describe_histogram!
(
"sgl_router_request_duration_seconds"
,
"Request duration in seconds by route"
);
describe_counter!
(
"sgl_router_request_errors_total"
,
"Total number of request errors by route and error type"
);
describe_counter!
(
"sgl_router_retries_total"
,
"Total number of request retries by route"
);
// Worker metrics
describe_gauge!
(
"sgl_router_active_workers"
,
"Number of currently active workers"
);
describe_gauge!
(
"sgl_router_worker_health"
,
"Worker health status (1=healthy, 0=unhealthy)"
);
describe_gauge!
(
"sgl_router_worker_load"
,
"Current load on each worker"
);
describe_counter!
(
"sgl_router_processed_requests_total"
,
"Total requests processed by each worker"
);
// Policy metrics
describe_counter!
(
"sgl_router_policy_decisions_total"
,
"Total routing policy decisions by policy and worker"
);
describe_counter!
(
"sgl_router_cache_hits_total"
,
"Total cache hits"
);
describe_counter!
(
"sgl_router_cache_misses_total"
,
"Total cache misses"
);
describe_gauge!
(
"sgl_router_tree_size"
,
"Current tree size for cache-aware routing"
);
describe_counter!
(
"sgl_router_load_balancing_events_total"
,
"Total load balancing trigger events"
);
describe_gauge!
(
"sgl_router_max_load"
,
"Maximum worker load"
);
describe_gauge!
(
"sgl_router_min_load"
,
"Minimum worker load"
);
// PD-specific metrics
describe_counter!
(
"sgl_router_pd_requests_total"
,
"Total PD requests by route"
);
describe_counter!
(
"sgl_router_pd_prefill_requests_total"
,
"Total prefill requests per worker"
);
describe_counter!
(
"sgl_router_pd_decode_requests_total"
,
"Total decode requests per worker"
);
describe_counter!
(
"sgl_router_pd_errors_total"
,
"Total PD errors by error type"
);
describe_counter!
(
"sgl_router_pd_prefill_errors_total"
,
"Total prefill server errors"
);
describe_counter!
(
"sgl_router_pd_decode_errors_total"
,
"Total decode server errors"
);
describe_counter!
(
"sgl_router_pd_stream_errors_total"
,
"Total streaming errors per worker"
);
describe_histogram!
(
"sgl_router_pd_request_duration_seconds"
,
"PD request duration by route"
);
// Service discovery metrics
describe_counter!
(
"sgl_router_discovery_updates_total"
,
"Total service discovery update events"
);
describe_gauge!
(
"sgl_router_discovery_workers_added"
,
"Number of workers added in last discovery update"
);
describe_gauge!
(
"sgl_router_discovery_workers_removed"
,
"Number of workers removed in last discovery update"
);
// Generate request specific metrics
describe_histogram!
(
"sgl_router_generate_duration_seconds"
,
"Generate request duration"
);
// Running requests gauge for cache-aware policy
describe_gauge!
(
"sgl_router_running_requests"
,
"Number of running requests per worker"
);
}
pub
fn
start_prometheus
(
config
:
PrometheusConfig
)
{
// Initialize metric descriptions
init_metrics
();
let
duration_matcher
=
Matcher
::
Suffix
(
String
::
from
(
"duration"
));
let
duration_bucket
=
[
0.001
,
0.005
,
0.01
,
0.025
,
0.05
,
0.1
,
0.25
,
0.5
,
1.0
,
2.5
,
5.0
,
10.0
,
15.0
,
30.0
,
45.0
,
60.0
,
90.0
,
120.0
,
180.0
,
240.0
,
];
let
ip_addr
:
IpAddr
=
config
.host
.parse
()
.unwrap_or
(
IpAddr
::
V4
(
Ipv4Addr
::
new
(
0
,
0
,
0
,
0
)));
let
socket_addr
=
SocketAddr
::
new
(
ip_addr
,
config
.port
);
PrometheusBuilder
::
new
()
.with_http_listener
(
socket_addr
)
.upkeep_timeout
(
Duration
::
from_secs
(
5
*
60
))
.set_buckets_for_metric
(
duration_matcher
,
&
duration_bucket
)
.expect
(
"failed to set duration bucket"
)
.install
()
.expect
(
"failed to install Prometheus metrics exporter"
);
}
pub
struct
RouterMetrics
;
impl
RouterMetrics
{
// Request metrics
pub
fn
record_request
(
route
:
&
str
)
{
counter!
(
"sgl_router_requests_total"
,
"route"
=>
route
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_request_duration
(
route
:
&
str
,
duration
:
Duration
)
{
histogram!
(
"sgl_router_request_duration_seconds"
,
"route"
=>
route
.to_string
()
)
.record
(
duration
.as_secs_f64
());
}
pub
fn
record_request_error
(
route
:
&
str
,
error_type
:
&
str
)
{
counter!
(
"sgl_router_request_errors_total"
,
"route"
=>
route
.to_string
(),
"error_type"
=>
error_type
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_retry
(
route
:
&
str
)
{
counter!
(
"sgl_router_retries_total"
,
"route"
=>
route
.to_string
()
)
.increment
(
1
);
}
// Worker metrics
pub
fn
set_active_workers
(
count
:
usize
)
{
gauge!
(
"sgl_router_active_workers"
)
.set
(
count
as
f64
);
}
pub
fn
set_worker_health
(
worker_url
:
&
str
,
healthy
:
bool
)
{
gauge!
(
"sgl_router_worker_health"
,
"worker"
=>
worker_url
.to_string
()
)
.set
(
if
healthy
{
1.0
}
else
{
0.0
});
}
pub
fn
set_worker_load
(
worker_url
:
&
str
,
load
:
usize
)
{
gauge!
(
"sgl_router_worker_load"
,
"worker"
=>
worker_url
.to_string
()
)
.set
(
load
as
f64
);
}
pub
fn
record_processed_request
(
worker_url
:
&
str
)
{
counter!
(
"sgl_router_processed_requests_total"
,
"worker"
=>
worker_url
.to_string
()
)
.increment
(
1
);
}
// Policy metrics
pub
fn
record_policy_decision
(
policy
:
&
str
,
worker
:
&
str
)
{
counter!
(
"sgl_router_policy_decisions_total"
,
"policy"
=>
policy
.to_string
(),
"worker"
=>
worker
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_cache_hit
()
{
counter!
(
"sgl_router_cache_hits_total"
)
.increment
(
1
);
}
pub
fn
record_cache_miss
()
{
counter!
(
"sgl_router_cache_misses_total"
)
.increment
(
1
);
}
pub
fn
set_tree_size
(
worker
:
&
str
,
size
:
usize
)
{
gauge!
(
"sgl_router_tree_size"
,
"worker"
=>
worker
.to_string
()
)
.set
(
size
as
f64
);
}
pub
fn
record_load_balancing_event
()
{
counter!
(
"sgl_router_load_balancing_events_total"
)
.increment
(
1
);
}
pub
fn
set_load_range
(
max_load
:
usize
,
min_load
:
usize
)
{
gauge!
(
"sgl_router_max_load"
)
.set
(
max_load
as
f64
);
gauge!
(
"sgl_router_min_load"
)
.set
(
min_load
as
f64
);
}
// PD-specific metrics
pub
fn
record_pd_request
(
route
:
&
str
)
{
counter!
(
"sgl_router_pd_requests_total"
,
"route"
=>
route
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_pd_request_duration
(
route
:
&
str
,
duration
:
Duration
)
{
histogram!
(
"sgl_router_pd_request_duration_seconds"
,
"route"
=>
route
.to_string
()
)
.record
(
duration
.as_secs_f64
());
}
pub
fn
record_pd_prefill_request
(
worker
:
&
str
)
{
counter!
(
"sgl_router_pd_prefill_requests_total"
,
"worker"
=>
worker
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_pd_decode_request
(
worker
:
&
str
)
{
counter!
(
"sgl_router_pd_decode_requests_total"
,
"worker"
=>
worker
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_pd_error
(
error_type
:
&
str
)
{
counter!
(
"sgl_router_pd_errors_total"
,
"error_type"
=>
error_type
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_pd_prefill_error
(
worker
:
&
str
)
{
counter!
(
"sgl_router_pd_prefill_errors_total"
,
"worker"
=>
worker
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_pd_decode_error
(
worker
:
&
str
)
{
counter!
(
"sgl_router_pd_decode_errors_total"
,
"worker"
=>
worker
.to_string
()
)
.increment
(
1
);
}
pub
fn
record_pd_stream_error
(
worker
:
&
str
)
{
counter!
(
"sgl_router_pd_stream_errors_total"
,
"worker"
=>
worker
.to_string
()
)
.increment
(
1
);
}
// Service discovery metrics
pub
fn
record_discovery_update
(
added
:
usize
,
removed
:
usize
)
{
counter!
(
"sgl_router_discovery_updates_total"
)
.increment
(
1
);
gauge!
(
"sgl_router_discovery_workers_added"
)
.set
(
added
as
f64
);
gauge!
(
"sgl_router_discovery_workers_removed"
)
.set
(
removed
as
f64
);
}
// Generate request metrics
pub
fn
record_generate_duration
(
duration
:
Duration
)
{
histogram!
(
"sgl_router_generate_duration_seconds"
)
.record
(
duration
.as_secs_f64
());
}
// Running requests for cache-aware policy
pub
fn
set_running_requests
(
worker
:
&
str
,
count
:
usize
)
{
gauge!
(
"sgl_router_running_requests"
,
"worker"
=>
worker
.to_string
()
)
.set
(
count
as
f64
);
}
}
sgl-router/src/policies/cache_aware.rs
View file @
8fcc55cf
...
...
@@ -61,8 +61,8 @@
use
super
::{
get_healthy_worker_indices
,
CacheAwareConfig
,
LoadBalancingPolicy
};
use
crate
::
core
::
Worker
;
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
tree
::
Tree
;
use
metrics
::{
counter
,
gauge
};
use
std
::
sync
::{
Arc
,
Mutex
};
use
std
::
thread
;
use
std
::
time
::
Duration
;
...
...
@@ -171,9 +171,8 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
max_load
,
min_load
,
worker_loads
);
counter!
(
"sgl_router_load_balancing_events_total"
)
.increment
(
1
);
gauge!
(
"sgl_router_max_load"
)
.set
(
max_load
as
f64
);
gauge!
(
"sgl_router_min_load"
)
.set
(
min_load
as
f64
);
RouterMetrics
::
record_load_balancing_event
();
RouterMetrics
::
set_load_range
(
max_load
,
min_load
);
// Use shortest queue when imbalanced
let
min_load_idx
=
healthy_indices
...
...
@@ -183,8 +182,7 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
// Increment processed counter
workers
[
min_load_idx
]
.increment_processed
();
counter!
(
"sgl_router_processed_requests_total"
,
"worker"
=>
workers
[
min_load_idx
]
.url
()
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_processed_request
(
workers
[
min_load_idx
]
.url
());
return
Some
(
min_load_idx
);
}
...
...
@@ -201,10 +199,10 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
};
let
selected_url
=
if
match_rate
>
self
.config.cache_threshold
{
c
ou
n
ter
!
(
"sgl_router_cache_hits_total"
)
.increment
(
1
);
R
outer
Metrics
::
record_cache_hit
(
);
matched_worker
.to_string
()
}
else
{
c
ou
n
ter
!
(
"sgl_router_cache_misses_total"
)
.increment
(
1
);
R
outer
Metrics
::
record_cache_miss
(
);
tree
.get_smallest_tenant
()
};
...
...
@@ -221,7 +219,7 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
// Increment processed counter
workers
[
selected_idx
]
.increment_processed
();
c
ou
n
ter
!
(
"sgl_router
_processed_request
s_total"
,
"worker"
=>
selected_url
)
.increment
(
1
);
R
outer
Metrics
::
record
_processed_request
(
&
selected_url
);
return
Some
(
selected_idx
);
}
...
...
sgl-router/src/policies/power_of_two.rs
View file @
8fcc55cf
...
...
@@ -2,7 +2,7 @@
use
super
::{
get_healthy_worker_indices
,
LoadBalancingPolicy
};
use
crate
::
core
::
Worker
;
use
metrics
::
c
ou
n
ter
;
use
crate
::
metrics
::
R
outer
Metrics
;
use
rand
::
Rng
;
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
RwLock
;
...
...
@@ -89,8 +89,7 @@ impl LoadBalancingPolicy for PowerOfTwoPolicy {
// Increment processed counter
workers
[
selected_idx
]
.increment_processed
();
counter!
(
"sgl_router_processed_requests_total"
,
"worker"
=>
workers
[
selected_idx
]
.url
()
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_processed_request
(
workers
[
selected_idx
]
.url
());
Some
(
selected_idx
)
}
...
...
sgl-router/src/prometheus.rs
deleted
100644 → 0
View file @
610381b7
use
metrics_exporter_prometheus
::{
Matcher
,
PrometheusBuilder
};
use
std
::
net
::{
IpAddr
,
Ipv4Addr
,
SocketAddr
};
use
std
::
time
::
Duration
;
#[derive(Debug,
Clone)]
pub
struct
PrometheusConfig
{
pub
port
:
u16
,
pub
host
:
String
,
}
impl
Default
for
PrometheusConfig
{
fn
default
()
->
Self
{
Self
{
port
:
29000
,
host
:
"0.0.0.0"
.to_string
(),
}
}
}
pub
fn
start_prometheus
(
config
:
PrometheusConfig
)
{
let
duration_matcher
=
Matcher
::
Suffix
(
String
::
from
(
"duration"
));
let
duration_bucket
=
[
0.001
,
0.005
,
0.01
,
0.025
,
0.05
,
0.1
,
0.25
,
0.5
,
1.0
,
2.5
,
5.0
,
10.0
,
15.0
,
30.0
,
45.0
,
60.0
,
90.0
,
120.0
,
180.0
,
240.0
,
];
let
ip_addr
:
IpAddr
=
config
.host
.parse
()
.unwrap_or
(
IpAddr
::
V4
(
Ipv4Addr
::
new
(
0
,
0
,
0
,
0
)));
let
socket_addr
=
SocketAddr
::
new
(
ip_addr
,
config
.port
);
PrometheusBuilder
::
new
()
.with_http_listener
(
socket_addr
)
.upkeep_timeout
(
Duration
::
from_secs
(
5
*
60
))
.set_buckets_for_metric
(
duration_matcher
,
&
duration_bucket
)
.expect
(
"failed to set duration bucket"
)
.install
()
.expect
(
"failed to install Prometheus metrics exporter"
);
}
sgl-router/src/routers/pd_router.rs
View file @
8fcc55cf
...
...
@@ -4,13 +4,13 @@
use
super
::
pd_types
::{
api_path
,
Bootstrap
,
ChatReqInput
,
GenerateReqInput
,
PDRouterError
};
use
super
::
request_adapter
::
ToPdRequest
;
use
crate
::
core
::{
HealthChecker
,
Worker
,
WorkerFactory
,
WorkerLoadGuard
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
openai_api_types
::{
ChatCompletionRequest
,
CompletionRequest
,
GenerateRequest
};
use
crate
::
policies
::
LoadBalancingPolicy
;
use
crate
::
tree
::
Tree
;
use
actix_web
::
http
::
header
::{
HeaderValue
,
CONTENT_TYPE
};
use
actix_web
::{
HttpRequest
,
HttpResponse
};
use
futures_util
::{
StreamExt
,
TryStreamExt
};
use
metrics
::{
counter
,
histogram
};
use
serde_json
::
Value
;
use
std
::
collections
::
HashMap
;
use
std
::
sync
::{
Arc
,
Mutex
,
RwLock
};
...
...
@@ -296,7 +296,7 @@ impl PDRouter {
Ok
(
pair
)
=>
pair
,
Err
(
e
)
=>
{
error!
(
"Failed to select PD pair: {}"
,
e
);
c
ou
n
ter
!
(
"sgl_router_pd_errors_total"
,
"error"
=>
"server_selection"
)
.increment
(
1
)
;
R
outer
Metrics
::
record_pd_error
(
"server_selection"
);
return
HttpResponse
::
ServiceUnavailable
()
.body
(
format!
(
"No available servers: {}"
,
e
));
}
...
...
@@ -313,7 +313,7 @@ impl PDRouter {
// Add bootstrap info using the trait method
if
let
Err
(
e
)
=
typed_req
.add_bootstrap_info
(
prefill
.as_ref
())
{
error!
(
"Failed to add bootstrap info: {}"
,
e
);
c
ou
n
ter
!
(
"sgl_router_pd_errors_total"
,
"error"
=>
"bootstrap_injection"
)
.increment
(
1
)
;
R
outer
Metrics
::
record_pd_error
(
"bootstrap_injection"
);
return
HttpResponse
::
InternalServerError
()
.body
(
format!
(
"Bootstrap injection failed: {}"
,
e
));
}
...
...
@@ -374,7 +374,7 @@ impl PDRouter {
Ok
(
pair
)
=>
pair
,
Err
(
e
)
=>
{
error!
(
"Failed to select PD pair: {}"
,
e
);
c
ou
n
ter
!
(
"sgl_router_pd_errors_total"
,
"error"
=>
"server_selection"
)
.increment
(
1
)
;
R
outer
Metrics
::
record_pd_error
(
"server_selection"
);
return
HttpResponse
::
ServiceUnavailable
()
.body
(
format!
(
"No available servers: {}"
,
e
));
}
...
...
@@ -391,7 +391,7 @@ impl PDRouter {
// Add bootstrap info using the trait method
if
let
Err
(
e
)
=
typed_req
.add_bootstrap_info
(
prefill
.as_ref
())
{
error!
(
"Failed to add bootstrap info: {}"
,
e
);
c
ou
n
ter
!
(
"sgl_router_pd_errors_total"
,
"error"
=>
"bootstrap_injection"
)
.increment
(
1
)
;
R
outer
Metrics
::
record_pd_error
(
"bootstrap_injection"
);
return
HttpResponse
::
InternalServerError
()
.body
(
format!
(
"Bootstrap injection failed: {}"
,
e
));
}
...
...
@@ -460,13 +460,10 @@ impl PDRouter {
// Update metrics
let
duration
=
start_time
.elapsed
();
histogram!
(
"sgl_router_pd_request_duration_seconds"
,
"route"
=>
route
.to_string
())
.record
(
duration
.as_secs_f64
());
counter!
(
"sgl_router_pd_requests_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
counter!
(
"sgl_router_pd_prefill_requests_total"
,
"worker"
=>
prefill
.url
()
.to_string
())
.increment
(
1
);
counter!
(
"sgl_router_pd_decode_requests_total"
,
"worker"
=>
decode
.url
()
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_pd_request_duration
(
route
,
duration
);
RouterMetrics
::
record_pd_request
(
route
);
RouterMetrics
::
record_pd_prefill_request
(
prefill
.url
());
RouterMetrics
::
record_pd_decode_request
(
decode
.url
());
// Process decode response
match
decode_result
{
...
...
@@ -475,7 +472,7 @@ impl PDRouter {
.unwrap_or
(
actix_web
::
http
::
StatusCode
::
INTERNAL_SERVER_ERROR
);
if
!
status
.is_success
()
{
c
ou
n
ter
!
(
"sgl_router
_pd_decode_error
s_total"
,
"worker"
=>
decode
.url
()
.to_string
())
.increment
(
1
);
R
outer
Metrics
::
record
_pd_decode_error
(
decode
.url
()
);
error!
(
"Decode server {} returned error status: {}"
,
decode
.url
(),
...
...
@@ -501,7 +498,7 @@ impl PDRouter {
prefill
.url
(),
e
);
c
ou
n
ter
!
(
"sgl_router
_pd_prefill_error
s_total"
,
"worker"
=>
prefill
.url
()
.to_string
())
.increment
(
1
);
R
outer
Metrics
::
record
_pd_prefill_error
(
prefill
.url
()
);
}
if
is_stream
{
...
...
@@ -548,13 +545,19 @@ impl PDRouter {
}
else
{
// No logprob merging needed
HttpResponse
::
build
(
status
)
.insert_header
((
CONTENT_TYPE
,
HeaderValue
::
from_static
(
"text/event-stream"
)))
.insert_header
((
CONTENT_TYPE
,
HeaderValue
::
from_static
(
"text/event-stream"
),
))
.streaming
({
let
decode_url
=
decode
.url
()
.to_string
();
res
.bytes_stream
()
.map_err
(
move
|
e
|
{
error!
(
"Stream error from decode server {}: {}"
,
decode_url
,
e
);
counter!
(
"sgl_router_pd_stream_errors_total"
,
"worker"
=>
decode_url
.to_string
())
.increment
(
1
);
actix_web
::
error
::
ErrorInternalServerError
(
format!
(
"Stream error: {}"
,
e
))
RouterMetrics
::
record_pd_stream_error
(
&
decode_url
);
actix_web
::
error
::
ErrorInternalServerError
(
format!
(
"Stream error: {}"
,
e
))
})
})
}
...
...
@@ -578,8 +581,7 @@ impl PDRouter {
}
Err
(
e
)
=>
{
error!
(
"Decode request failed: {}"
,
e
);
counter!
(
"sgl_router_pd_decode_errors_total"
,
"worker"
=>
decode
.url
()
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_pd_decode_error
(
decode
.url
());
HttpResponse
::
BadGateway
()
.body
(
format!
(
"Decode server error: {}"
,
e
))
}
}
...
...
sgl-router/src/routers/pd_types.rs
View file @
8fcc55cf
...
...
@@ -151,13 +151,6 @@ impl GenerateReqInput {
if
texts
.is_empty
()
{
return
Err
(
"Batch text array is empty"
.to_string
());
}
if
texts
.len
()
>
10000
{
// Reasonable limit for production
return
Err
(
format!
(
"Batch size {} exceeds maximum allowed (10000)"
,
texts
.len
()
));
}
return
Ok
(
Some
(
texts
.len
()));
}
...
...
@@ -166,13 +159,6 @@ impl GenerateReqInput {
if
ids
.is_empty
()
{
return
Err
(
"Batch input_ids array is empty"
.to_string
());
}
if
ids
.len
()
>
10000
{
// Reasonable limit for production
return
Err
(
format!
(
"Batch size {} exceeds maximum allowed (10000)"
,
ids
.len
()
));
}
// Validate each sequence is not empty
for
(
i
,
seq
)
in
ids
.iter
()
.enumerate
()
{
if
seq
.is_empty
()
{
...
...
sgl-router/src/routers/router.rs
View file @
8fcc55cf
use
crate
::
core
::{
HealthChecker
,
Worker
,
WorkerFactory
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
policies
::
LoadBalancingPolicy
;
use
::
metrics
::{
counter
,
gauge
,
histogram
};
use
actix_web
::
http
::
header
::{
HeaderValue
,
CONTENT_TYPE
};
use
actix_web
::{
HttpRequest
,
HttpResponse
};
use
futures_util
::{
StreamExt
,
TryStreamExt
};
...
...
@@ -43,7 +43,7 @@ impl Router {
interval_secs
:
u64
,
)
->
Result
<
Self
,
String
>
{
// Update active workers gauge
gauge!
(
"sgl_router
_active_workers
"
)
.set
(
worker_urls
.len
()
as
f64
);
RouterMetrics
::
set
_active_workers
(
worker_urls
.len
());
// Wait for workers to be healthy (skip if empty - for service discovery mode)
if
!
worker_urls
.is_empty
()
{
...
...
@@ -215,13 +215,11 @@ impl Router {
// Record request metrics
if
route
!=
"/health"
{
let
duration
=
start
.elapsed
();
counter!
(
"sgl_router_requests_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
histogram!
(
"sgl_router_request_duration_seconds"
,
"route"
=>
route
.to_string
())
.record
(
duration
.as_secs_f64
());
RouterMetrics
::
record_request
(
route
);
RouterMetrics
::
record_request_duration
(
route
,
duration
);
if
!
response
.status
()
.is_success
()
{
counter!
(
"sgl_router_request_errors_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_request_error
(
route
,
"request_failed"
);
}
}
response
...
...
@@ -390,7 +388,7 @@ impl Router {
while
request_retries
<
MAX_REQUEST_RETRIES
{
if
total_retries
>=
1
{
info!
(
"Retrying request after {} failed attempts"
,
total_retries
);
c
ou
n
ter
!
(
"sgl_router_retries_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
R
outer
Metrics
::
record_retry
(
route
);
}
// Increment load before request if using RAII load tracking
...
...
@@ -398,8 +396,7 @@ impl Router {
let
workers_guard
=
self
.workers
.read
()
.unwrap
();
if
let
Some
(
worker
)
=
workers_guard
.iter
()
.find
(|
w
|
w
.url
()
==
&
worker_url
)
{
worker
.increment_load
();
gauge!
(
"sgl_router_running_requests"
,
"worker"
=>
worker_url
.to_string
())
.set
(
worker
.load
()
as
f64
);
RouterMetrics
::
set_running_requests
(
&
worker_url
,
worker
.load
());
true
}
else
{
false
...
...
@@ -423,16 +420,14 @@ impl Router {
if
response
.status
()
.is_success
()
{
let
duration
=
start
.elapsed
();
histogram!
(
"sgl_router_generate_duration_seconds"
,
"route"
=>
route
.to_string
())
.record
(
duration
.as_secs_f64
());
RouterMetrics
::
record_generate_duration
(
duration
);
return
response
;
}
else
{
// if the worker is healthy, it means the request is bad, so return the error response
let
health_response
=
self
.send_request
(
client
,
&
worker_url
,
"/health"
,
req
)
.await
;
if
health_response
.status
()
.is_success
()
{
counter!
(
"sgl_router_request_errors_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_request_error
(
route
,
"request_failed"
);
return
response
;
}
}
...
...
@@ -455,7 +450,7 @@ impl Router {
}
}
c
ou
n
ter
!
(
"sgl_router_request_errors_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
R
outer
Metrics
::
record_request_error
(
route
,
"request_failed"
);
HttpResponse
::
InternalServerError
()
.body
(
"All retry attempts failed"
)
}
...
...
@@ -512,8 +507,7 @@ impl Router {
if
let
Ok
(
workers_guard
)
=
self
.workers
.read
()
{
if
let
Some
(
worker
)
=
workers_guard
.iter
()
.find
(|
w
|
w
.url
()
==
worker_url
)
{
worker
.decrement_load
();
gauge!
(
"sgl_router_running_requests"
,
"worker"
=>
worker_url
.to_string
())
.set
(
worker
.load
()
as
f64
);
RouterMetrics
::
set_running_requests
(
&
worker_url
,
worker
.load
());
}
}
}
...
...
@@ -540,17 +534,15 @@ impl Router {
if
let
Ok
(
workers_guard
)
=
self
.workers
.read
()
{
if
let
Some
(
worker
)
=
workers_guard
.iter
()
.find
(|
w
|
w
.url
()
==
worker_url
)
{
worker
.decrement_load
();
gauge!
(
"sgl_router_running_requests"
,
"worker"
=>
worker_url
.to_string
())
.set
(
worker
.load
()
as
f64
);
RouterMetrics
::
set_running_requests
(
&
worker_url
,
worker
.load
());
}
}
}
// Record metrics
let
duration
=
start
.elapsed
();
histogram!
(
"sgl_router_generate_duration_seconds"
,
"route"
=>
route
.to_string
())
.record
(
duration
.as_secs_f64
());
counter!
(
"sgl_router_requests_total"
,
"route"
=>
route
.to_string
())
.increment
(
1
);
RouterMetrics
::
record_generate_duration
(
duration
);
RouterMetrics
::
record_request
(
route
);
response
}
else
if
load_incremented
{
...
...
@@ -577,8 +569,10 @@ impl Router {
workers_guard
.iter
()
.find
(|
w
|
w
.url
()
==
&
worker_url
)
{
worker
.decrement_load
();
gauge!
(
"sgl_router_running_requests"
,
"worker"
=>
worker_url
.to_string
())
.set
(
worker
.load
()
as
f64
);
RouterMetrics
::
set_running_requests
(
&
worker_url
,
worker
.load
(),
);
debug!
(
"Streaming is done!!"
)
}
}
...
...
@@ -626,7 +620,7 @@ impl Router {
info!
(
"Added worker: {}"
,
worker_url
);
let
new_worker
=
WorkerFactory
::
create_regular
(
worker_url
.to_string
());
workers_guard
.push
(
new_worker
);
gauge!
(
"sgl_router
_active_workers
"
)
.set
(
workers_guard
.len
()
as
f64
);
RouterMetrics
::
set
_active_workers
(
workers_guard
.len
());
// If cache aware policy, initialize the worker in the tree
if
let
Some
(
cache_aware
)
=
...
...
@@ -680,7 +674,7 @@ impl Router {
if
let
Some
(
index
)
=
workers_guard
.iter
()
.position
(|
w
|
w
.url
()
==
worker_url
)
{
workers_guard
.remove
(
index
);
info!
(
"Removed worker: {}"
,
worker_url
);
gauge!
(
"sgl_router
_active_workers
"
)
.set
(
workers_guard
.len
()
as
f64
);
RouterMetrics
::
set
_active_workers
(
workers_guard
.len
());
}
else
{
warn!
(
"Worker {} not found, skipping removal"
,
worker_url
);
return
;
...
...
sgl-router/src/server.rs
View file @
8fcc55cf
use
crate
::
config
::
RouterConfig
;
use
crate
::
logging
::{
self
,
LoggingConfig
};
use
crate
::
metrics
::{
self
,
PrometheusConfig
};
use
crate
::
openai_api_types
::{
ChatCompletionRequest
,
CompletionRequest
,
GenerateRequest
};
use
crate
::
prometheus
::{
self
,
PrometheusConfig
};
use
crate
::
routers
::{
RouterFactory
,
RouterTrait
};
use
crate
::
service_discovery
::{
start_service_discovery
,
ServiceDiscoveryConfig
};
use
actix_web
::{
...
...
@@ -237,7 +237,7 @@ pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
"🚧 Initializing Prometheus metrics on {}:{}"
,
prometheus_config
.host
,
prometheus_config
.port
);
prometheu
s
::
start_prometheus
(
prometheus_config
);
metric
s
::
start_prometheus
(
prometheus_config
);
}
else
{
info!
(
"🚧 Prometheus metrics disabled"
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment