Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f1e8ea6e
Unverified
Commit
f1e8ea6e
authored
Mar 05, 2026
by
Karen Chung
Committed by
GitHub
Mar 05, 2026
Browse files
fix: Enforce min_endpoint flag in Planner (#6637)
parent
0853129a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
3 deletions
+23
-3
components/src/dynamo/planner/utils/agg_planner.py
components/src/dynamo/planner/utils/agg_planner.py
+2
-2
components/src/dynamo/planner/utils/decode_planner.py
components/src/dynamo/planner/utils/decode_planner.py
+7
-0
components/src/dynamo/planner/utils/disagg_planner.py
components/src/dynamo/planner/utils/disagg_planner.py
+4
-0
components/src/dynamo/planner/utils/planner_core.py
components/src/dynamo/planner/utils/planner_core.py
+3
-1
components/src/dynamo/planner/utils/prefill_planner.py
components/src/dynamo/planner/utils/prefill_planner.py
+7
-0
No files found.
components/src/dynamo/planner/utils/agg_planner.py
View file @
f1e8ea6e
...
...
@@ -210,7 +210,7 @@ class AggPlanner:
return
"up"
# Scale down: ALL workers below boundary
if
num_workers
>
1
:
if
num_workers
>
self
.
config
.
min_endpoint
:
sensitivity
=
self
.
config
.
load_scaling_down_sensitivity
/
100.0
boundary
=
target
*
(
num_workers
-
1
)
/
num_workers
*
sensitivity
if
all
(
...
...
@@ -253,7 +253,7 @@ class AggPlanner:
# Scale down: ALL workers below boundary
# TODO: should we strictly enforce all workers below boundary?
# how about user-configurable percentage?
if
num_workers
>
1
:
if
num_workers
>
self
.
config
.
min_endpoint
:
sensitivity
=
self
.
config
.
load_scaling_down_sensitivity
/
100.0
boundary
=
x_sla
*
(
num_workers
-
1
)
/
num_workers
*
sensitivity
if
all
(
...
...
components/src/dynamo/planner/utils/decode_planner.py
View file @
f1e8ea6e
...
...
@@ -69,6 +69,13 @@ class DecodePlanner(BasePlanner):
m
.
get
(
"active_decode_blocks"
,
0.0
)
<
boundary
for
m
in
recent
.
values
()
)
if
all_below
:
if
num_workers
-
1
<
self
.
config
.
min_endpoint
:
logger
.
info
(
f
"Load-based decode: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"but cannot scale down below min_endpoint (
{
self
.
config
.
min_endpoint
}
); "
f
"maintaining
{
num_workers
}
decode workers"
)
return
num_workers
logger
.
info
(
f
"Load-based decode: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"scaling down to
{
num_workers
-
1
}
"
...
...
components/src/dynamo/planner/utils/disagg_planner.py
View file @
f1e8ea6e
...
...
@@ -226,6 +226,10 @@ class DisaggPlanner:
final_p
=
max
(
final_p
,
self
.
shared_state
.
throughput_lower_bound_p
)
final_d
=
max
(
final_d
,
self
.
shared_state
.
throughput_lower_bound_d
)
# Enforce minimum endpoints
final_p
=
max
(
final_p
,
self
.
config
.
min_endpoint
)
final_d
=
max
(
final_d
,
self
.
config
.
min_endpoint
)
# Apply GPU budget
final_p
,
final_d
=
_apply_global_gpu_budget
(
final_p
,
final_d
,
self
.
config
)
...
...
components/src/dynamo/planner/utils/planner_core.py
View file @
f1e8ea6e
...
...
@@ -746,7 +746,9 @@ class BasePlanner:
def
apply_component_budget
(
self
,
desired_replicas
:
int
)
->
int
:
return
_apply_component_gpu_budget
(
desired_replicas
,
self
.
_engine_num_gpu
(),
self
.
config
max
(
desired_replicas
,
self
.
config
.
min_endpoint
),
self
.
_engine_num_gpu
(),
self
.
config
,
)
async
def
_apply_scaling
(
self
,
desired_replicas
:
int
)
->
None
:
...
...
components/src/dynamo/planner/utils/prefill_planner.py
View file @
f1e8ea6e
...
...
@@ -78,6 +78,13 @@ class PrefillPlanner(BasePlanner):
m
.
get
(
"active_prefill_tokens"
,
0.0
)
<
boundary
for
m
in
recent
.
values
()
)
if
all_below
:
if
num_workers
-
1
<
self
.
config
.
min_endpoint
:
logger
.
info
(
f
"Load-based prefill: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"but cannot scale down below min_endpoint (
{
self
.
config
.
min_endpoint
}
); "
f
"maintaining
{
num_workers
}
prefill workers"
)
return
num_workers
logger
.
info
(
f
"Load-based prefill: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"scaling down to
{
num_workers
-
1
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment