Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f1e8ea6e
"docs/reference/feature-matrix.md" did not exist on "4ed8584a4ae4363858df400611e17a8e81403319"
Unverified
Commit
f1e8ea6e
authored
Mar 05, 2026
by
Karen Chung
Committed by
GitHub
Mar 05, 2026
Browse files
fix: Enforce min_endpoint flag in Planner (#6637)
parent
0853129a
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
3 deletions
+23
-3
components/src/dynamo/planner/utils/agg_planner.py
components/src/dynamo/planner/utils/agg_planner.py
+2
-2
components/src/dynamo/planner/utils/decode_planner.py
components/src/dynamo/planner/utils/decode_planner.py
+7
-0
components/src/dynamo/planner/utils/disagg_planner.py
components/src/dynamo/planner/utils/disagg_planner.py
+4
-0
components/src/dynamo/planner/utils/planner_core.py
components/src/dynamo/planner/utils/planner_core.py
+3
-1
components/src/dynamo/planner/utils/prefill_planner.py
components/src/dynamo/planner/utils/prefill_planner.py
+7
-0
No files found.
components/src/dynamo/planner/utils/agg_planner.py
View file @
f1e8ea6e
...
...
@@ -210,7 +210,7 @@ class AggPlanner:
return
"up"
# Scale down: ALL workers below boundary
if
num_workers
>
1
:
if
num_workers
>
self
.
config
.
min_endpoint
:
sensitivity
=
self
.
config
.
load_scaling_down_sensitivity
/
100.0
boundary
=
target
*
(
num_workers
-
1
)
/
num_workers
*
sensitivity
if
all
(
...
...
@@ -253,7 +253,7 @@ class AggPlanner:
# Scale down: ALL workers below boundary
# TODO: should we strictly enforce all workers below boundary?
# how about user-configurable percentage?
if
num_workers
>
1
:
if
num_workers
>
self
.
config
.
min_endpoint
:
sensitivity
=
self
.
config
.
load_scaling_down_sensitivity
/
100.0
boundary
=
x_sla
*
(
num_workers
-
1
)
/
num_workers
*
sensitivity
if
all
(
...
...
components/src/dynamo/planner/utils/decode_planner.py
View file @
f1e8ea6e
...
...
@@ -69,6 +69,13 @@ class DecodePlanner(BasePlanner):
m
.
get
(
"active_decode_blocks"
,
0.0
)
<
boundary
for
m
in
recent
.
values
()
)
if
all_below
:
if
num_workers
-
1
<
self
.
config
.
min_endpoint
:
logger
.
info
(
f
"Load-based decode: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"but cannot scale down below min_endpoint (
{
self
.
config
.
min_endpoint
}
); "
f
"maintaining
{
num_workers
}
decode workers"
)
return
num_workers
logger
.
info
(
f
"Load-based decode: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"scaling down to
{
num_workers
-
1
}
"
...
...
components/src/dynamo/planner/utils/disagg_planner.py
View file @
f1e8ea6e
...
...
@@ -226,6 +226,10 @@ class DisaggPlanner:
final_p
=
max
(
final_p
,
self
.
shared_state
.
throughput_lower_bound_p
)
final_d
=
max
(
final_d
,
self
.
shared_state
.
throughput_lower_bound_d
)
# Enforce minimum endpoints
final_p
=
max
(
final_p
,
self
.
config
.
min_endpoint
)
final_d
=
max
(
final_d
,
self
.
config
.
min_endpoint
)
# Apply GPU budget
final_p
,
final_d
=
_apply_global_gpu_budget
(
final_p
,
final_d
,
self
.
config
)
...
...
components/src/dynamo/planner/utils/planner_core.py
View file @
f1e8ea6e
...
...
@@ -746,7 +746,9 @@ class BasePlanner:
def
apply_component_budget
(
self
,
desired_replicas
:
int
)
->
int
:
return
_apply_component_gpu_budget
(
desired_replicas
,
self
.
_engine_num_gpu
(),
self
.
config
max
(
desired_replicas
,
self
.
config
.
min_endpoint
),
self
.
_engine_num_gpu
(),
self
.
config
,
)
async
def
_apply_scaling
(
self
,
desired_replicas
:
int
)
->
None
:
...
...
components/src/dynamo/planner/utils/prefill_planner.py
View file @
f1e8ea6e
...
...
@@ -78,6 +78,13 @@ class PrefillPlanner(BasePlanner):
m
.
get
(
"active_prefill_tokens"
,
0.0
)
<
boundary
for
m
in
recent
.
values
()
)
if
all_below
:
if
num_workers
-
1
<
self
.
config
.
min_endpoint
:
logger
.
info
(
f
"Load-based prefill: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"but cannot scale down below min_endpoint (
{
self
.
config
.
min_endpoint
}
); "
f
"maintaining
{
num_workers
}
prefill workers"
)
return
num_workers
logger
.
info
(
f
"Load-based prefill: ALL workers below boundary (
{
boundary
:.
1
f
}
), "
f
"scaling down to
{
num_workers
-
1
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment