Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
db79f324
Unverified
Commit
db79f324
authored
Mar 24, 2026
by
Jacky
Committed by
GitHub
Mar 24, 2026
Browse files
fix: Request Rejection busy signal passage (#7615)
Signed-off-by:
Jacky
<
18255193+kthui@users.noreply.github.com
>
parent
b6317af6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
7 deletions
+16
-7
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+11
-1
tests/router/common.py
tests/router/common.py
+5
-5
tests/router/test_router_e2e_with_mockers.py
tests/router/test_router_e2e_with_mockers.py
+0
-1
No files found.
lib/llm/src/discovery/watcher.rs
View file @
db79f324
...
...
@@ -502,8 +502,18 @@ impl ModelWatcher {
// monitor (1-to-1) since each monitor is scoped to this WorkerSet's Client/namespace.
// The monitor tracks Prometheus metrics (active_decode_blocks, active_prefill_tokens,
// worker TTFT/ITL cleanup). The thresholds control busy detection behavior only.
//
// IMPORTANT: When KV routing is active, the monitor must use the KvRouter's Client
// so that busy-state updates (via update_free_instances) are visible to the
// PushRouter, which also uses the KvRouter's Client (see common.rs:258-263).
// Using a different Client instance would cause the PushRouter to never see
// busy workers, since each Client::new() creates independent ArcSwap state.
let
monitor_client
=
kv_chooser
.as_ref
()
.map
(|
chooser
|
chooser
.client
()
.clone
())
.unwrap_or_else
(||
client
.clone
());
let
worker_monitor
=
Some
(
KvWorkerMonitor
::
new
(
client
.clone
()
,
monitor_client
,
self
.router_config.load_threshold_config
.clone
(),
));
...
...
tests/router/common.py
View file @
db79f324
...
...
@@ -677,7 +677,7 @@ def _test_router_overload_503(
)
# Wait briefly to ensure requests are in-flight
await
asyncio
.
sleep
(
0.
2
)
await
asyncio
.
sleep
(
0.
8
)
# Now send one more request that should get 503
logger
.
info
(
"Sending additional request that should receive 503..."
)
...
...
@@ -687,10 +687,10 @@ def _test_router_overload_503(
if
status_code
==
503
:
body
=
await
response
.
json
()
logger
.
info
(
f
"Got expected 503 response:
{
body
}
"
)
assert
"Service temporarily unavailable"
in
body
.
get
(
"error"
,
""
)
or
"All workers are busy"
in
body
.
get
(
"error"
,
""
error_msg
=
body
.
get
(
"message"
,
""
)
assert
(
"Service temporarily unavailable"
in
error_msg
or
"All workers are busy"
in
error_msg
),
f
"Expected service overload error message, got:
{
body
}
"
return
True
else
:
...
...
tests/router/test_router_e2e_with_mockers.py
View file @
db79f324
...
...
@@ -762,7 +762,6 @@ def test_mocker_two_kv_router(
)
@
pytest
.
mark
.
skip
(
reason
=
"Flaky, temporarily disabled"
)
@
pytest
.
mark
.
parametrize
(
"durable_kv_events"
,
[
False
],
ids
=
[
"nondurable"
],
indirect
=
True
)
# Use NATS Core (local indexer)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment