Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
ddab4fc7
Unverified
Commit
ddab4fc7
authored
Sep 23, 2025
by
Simo Lin
Committed by
GitHub
Sep 23, 2025
Browse files
[router] fix cache aware routing strategy and lock contention (#10773)
parent
d21c3522
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
68 additions
and
93 deletions
+68
-93
.github/workflows/pr-test-pd-router.yml
.github/workflows/pr-test-pd-router.yml
+1
-1
sgl-router/src/policies/cache_aware.rs
sgl-router/src/policies/cache_aware.rs
+67
-92
No files found.
.github/workflows/pr-test-pd-router.yml
View file @
ddab4fc7
...
...
@@ -219,7 +219,7 @@ jobs:
--decode http://127.0.0.7:30007 \
--decode http://127.0.0.8:30008 \
--host 127.0.0.9 \
--log-level warn
ing
\
--log-level warn \
--port 8000 &
ROUTER_PID=$!
...
...
sgl-router/src/policies/cache_aware.rs
View file @
ddab4fc7
...
...
@@ -129,11 +129,14 @@ impl CacheAwarePolicy {
// Use "default" for unknown/empty model_ids for backward compatibility
let
model_id
=
worker
.model_id
();
let
tree_key
=
if
model_id
.is_empty
()
||
model_id
==
"unknown"
{
"default"
.to_string
()
"default"
}
else
{
model_id
.to_string
()
model_id
};
model_workers
.entry
(
tree_key
)
.or_default
()
.push
(
worker
);
model_workers
.entry
(
tree_key
.to_string
())
.or_default
()
.push
(
worker
);
}
// Initialize tree for each model
...
...
@@ -153,11 +156,11 @@ impl CacheAwarePolicy {
// use a default tree. This preserves existing behavior for single-model routers.
let
model_id
=
worker
.model_id
();
let
tree_key
=
if
model_id
.is_empty
()
||
model_id
==
"unknown"
{
"default"
.to_string
()
"default"
}
else
{
model_id
.to_string
()
model_id
};
let
tree
=
trees
.entry
(
tree_key
)
.or_insert_with
(
Tree
::
new
);
let
tree
=
trees
.entry
(
tree_key
.to_string
()
)
.or_insert_with
(
Tree
::
new
);
tree
.insert
(
""
,
worker
.url
());
}
}
...
...
@@ -176,11 +179,11 @@ impl CacheAwarePolicy {
// Use same logic as add_worker for consistency
let
model_id
=
worker
.model_id
();
let
tree_key
=
if
model_id
.is_empty
()
||
model_id
==
"unknown"
{
"default"
.to_string
()
"default"
}
else
{
model_id
.to_string
()
model_id
};
if
let
Some
(
tree
)
=
trees
.get_mut
(
&
tree_key
)
{
if
let
Some
(
tree
)
=
trees
.get_mut
(
tree_key
)
{
tree
.remove_tenant
(
worker
.url
());
}
}
...
...
@@ -222,17 +225,14 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
return
None
;
}
// Group workers by model (using "default" for unknown/empty model_ids)
let
mut
model_workers
:
HashMap
<
String
,
Vec
<
usize
>>
=
HashMap
::
new
();
for
idx
in
&
healthy_indices
{
let
model_id
=
workers
[
*
idx
]
.model_id
();
let
tree_key
=
if
model_id
.is_empty
()
||
model_id
==
"unknown"
{
"default"
.to_string
()
}
else
{
model_id
.to_string
()
};
model_workers
.entry
(
tree_key
)
.or_default
()
.push
(
*
idx
);
}
// Determine the model for this set of workers (router pre-filters by model)
// All workers should be from the same model
let
first_model
=
workers
[
healthy_indices
[
0
]]
.model_id
();
let
model_id
=
if
first_model
.is_empty
()
||
first_model
==
"unknown"
{
"default"
}
else
{
first_model
};
// Get current load statistics
let
loads
:
Vec
<
usize
>
=
workers
.iter
()
.map
(|
w
|
w
.load
())
.collect
();
...
...
@@ -267,13 +267,18 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
// Even in imbalanced mode, update the tree to maintain cache state
if
let
Some
(
text
)
=
request_text
{
if
let
Ok
(
mut
trees
)
=
self
.trees
.lock
()
{
let
model_id
=
workers
[
min_load_idx
]
.model_id
();
let
tree
_key
=
if
model_id
.is_empty
()
||
model_id
==
"unknown"
{
"default"
.to_string
()
// Avoid allocation if tree already exists
let
tree
=
if
let
Some
(
tree
)
=
trees
.get_mut
(
model_id
)
{
tree
}
else
{
model_id
.to_string
()
// Create new tree and initialize with all workers
let
new_tree
=
Tree
::
new
();
// Initialize with all healthy workers like OLD version does
for
&
idx
in
&
healthy_indices
{
new_tree
.insert
(
""
,
workers
[
idx
]
.url
());
}
trees
.entry
(
model_id
.to_string
())
.or_insert
(
new_tree
)
};
let
tree
=
trees
.entry
(
tree_key
)
.or_insert_with
(
Tree
::
new
);
tree
.insert
(
text
,
workers
[
min_load_idx
]
.url
());
}
}
...
...
@@ -290,84 +295,54 @@ impl LoadBalancingPolicy for CacheAwarePolicy {
let
text
=
request_text
.unwrap_or
(
""
);
if
let
Ok
(
mut
trees
)
=
self
.trees
.lock
()
{
let
mut
best_match_idx
:
Option
<
usize
>
=
None
;
let
mut
best_match_rate
:
f32
=
0.0
;
// Find best match across all models
for
(
model_id
,
worker_indices
)
in
&
model_workers
{
let
tree
=
trees
.entry
(
model_id
.clone
())
.or_insert_with
(
Tree
::
new
);
let
(
matched_text
,
matched_worker
)
=
tree
.prefix_match
(
text
);
let
match_rate
=
if
text
.is_empty
()
{
0.0
}
else
{
matched_text
.chars
()
.count
()
as
f32
/
text
.chars
()
.count
()
as
f32
};
// Check if this model has the best match
if
match_rate
>
best_match_rate
{
// Find the worker index for this URL
if
let
Some
(
idx
)
=
worker_indices
.iter
()
.find
(|
&&
idx
|
workers
[
idx
]
.url
()
==
matched_worker
)
{
best_match_idx
=
Some
(
*
idx
);
best_match_rate
=
match_rate
;
}
// Avoid allocation if tree already exists
let
tree
=
if
let
Some
(
tree
)
=
trees
.get_mut
(
model_id
)
{
tree
}
else
{
// Create new tree and initialize with all workers
let
new_tree
=
Tree
::
new
();
// Initialize with all healthy workers like OLD version does
for
&
idx
in
&
healthy_indices
{
new_tree
.insert
(
""
,
workers
[
idx
]
.url
());
}
}
trees
.entry
(
model_id
.to_string
())
.or_insert
(
new_tree
)
};
let
(
matched_text
,
matched_worker
)
=
tree
.prefix_match
(
text
);
let
match_rate
=
if
text
.is_empty
()
{
0.0
}
else
{
matched_text
.chars
()
.count
()
as
f32
/
text
.chars
()
.count
()
as
f32
};
// Select worker based on cache threshold
let
selected_idx
=
if
let
(
Some
(
idx
),
true
)
=
(
best_match_idx
,
best_match_rate
>
self
.config.cache_threshold
,
)
{
let
selected_url
=
if
match_rate
>
self
.config.cache_threshold
{
RouterMetrics
::
record_cache_hit
();
idx
matched_worker
.to_string
()
}
else
{
RouterMetrics
::
record_cache_miss
();
tree
.get_smallest_tenant
()
};
// Find model with smallest tree (most cache capacity)
let
mut
smallest_tree_model
=
String
::
new
();
let
mut
smallest_tree_size
=
usize
::
MAX
;
// Find the index of the selected worker
if
let
Some
(
selected_idx
)
=
workers
.iter
()
.position
(|
w
|
w
.url
()
==
selected_url
)
{
// Only proceed if the worker is healthy - use direct check like OLD version
if
workers
[
selected_idx
]
.is_healthy
()
{
// Update the tree with this request
tree
.insert
(
text
,
&
selected_url
);
for
model_id
in
model_workers
.keys
()
{
let
tree
=
trees
.entry
(
model_id
.clone
())
.or_insert_with
(
Tree
::
new
);
let
size
=
tree
.get_used_size_per_tenant
()
.values
()
.sum
::
<
usize
>
();
if
size
<
smallest_tree_size
{
smallest_tree_size
=
size
;
smallest_tree_model
=
model_id
.clone
();
}
}
// Increment processed counter
workers
[
selected_idx
]
.increment_processed
();
RouterMetrics
::
record_processed_request
(
&
selected_url
);
// Select least loaded worker from model with most cache capacity
if
let
Some
(
worker_indices
)
=
model_workers
.get
(
&
smallest_tree_model
)
{
worker_indices
.iter
()
.min_by_key
(|
&&
idx
|
workers
[
idx
]
.load
())
.copied
()
.unwrap_or
(
healthy_indices
[
0
])
}
else
{
healthy_indices
[
0
]
return
Some
(
selected_idx
);
}
};
// Update the tree with this request
let
model_id
=
workers
[
selected_idx
]
.model_id
();
let
tree_key
=
if
model_id
.is_empty
()
||
model_id
==
"unknown"
{
"default"
.to_string
()
}
else
{
model_id
.to_string
()
};
let
tree
=
trees
.entry
(
tree_key
)
.or_insert_with
(
Tree
::
new
);
tree
.insert
(
text
,
workers
[
selected_idx
]
.url
());
// Increment processed counter
workers
[
selected_idx
]
.increment_processed
();
RouterMetrics
::
record_processed_request
(
workers
[
selected_idx
]
.url
());
RouterMetrics
::
record_policy_decision
(
self
.name
(),
workers
[
selected_idx
]
.url
());
// Selected worker no longer exists, remove it from tree
tree
.remove_tenant
(
&
selected_url
);
debug!
(
"Removed stale worker {} from cache tree"
,
selected_url
);
}
return
Some
(
selected_idx
);
// Fallback to first healthy worker
return
healthy_indices
.first
()
.copied
();
}
// Fallback to first healthy worker if tree operations fail
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment