Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
941ad640
Unverified
Commit
941ad640
authored
Jan 28, 2026
by
luc-hiverge
Committed by
GitHub
Jan 28, 2026
Browse files
fix: emit first token creation signal after sleeping. (#5681)
Signed-off-by:
Luc Grosheintz
<
luc@hiverge.ai
>
parent
d1697dc3
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
19 deletions
+20
-19
lib/llm/src/mocker/scheduler.rs
lib/llm/src/mocker/scheduler.rs
+20
-19
No files found.
lib/llm/src/mocker/scheduler.rs
View file @
941ad640
...
...
@@ -297,42 +297,33 @@ impl Scheduler {
break
;
}
// Start timing for this forward pass (schedule + simulate)
let
iteration_start
=
std
::
time
::
Instant
::
now
();
// 2. Schedule waiting requests (once per iteration)
try_schedule
(
&
mut
state
,
&
kv_manager
,
&
mut
hit_rates
,
&
args
);
// 3. Simulate prefill + decode
let
prefill_time
=
simulate_prefill
(
simulate_prefill
(
&
mut
state
,
&
mut
kv_manager
,
&
args
.perf_model
,
args
.worker_type
,
);
let
decode_time
=
simulate_decode
(
args
.speedup_ratio
,
)
.await
;
simulate_decode
(
&
mut
state
,
&
mut
kv_manager
,
&
output_tx
,
&
args
.perf_model
,
args
.block_size
,
);
let
total_time
=
prefill_time
+
decode_time
;
args
.speedup_ratio
,
)
.await
;
// 4. Send metrics once per forward pass (after all prefill and decode processing)
let
_
=
metrics_tx
.send
(
MockerMetrics
{
dp_rank
,
active_decode_blocks
:
kv_manager
.num_active_blocks
()
as
u64
,
});
// 5. Sleep to maintain target iteration timing
let
target_duration
=
Duration
::
from_secs_f64
(
total_time
.as_secs_f64
()
/
args
.speedup_ratio
);
let
elapsed
=
iteration_start
.elapsed
();
if
elapsed
<
target_duration
{
tokio
::
time
::
sleep
(
target_duration
-
elapsed
)
.await
;
}
}
});
...
...
@@ -392,12 +383,14 @@ async fn receive_requests(
/// Simulate prefill phase for all pending prefill requests.
/// Returns the total prefill compute time.
fn
simulate_prefill
(
async
fn
simulate_prefill
(
state
:
&
mut
SchedulerState
,
kv_manager
:
&
mut
KvManager
,
perf_model
:
&
PerfModel
,
worker_type
:
WorkerType
,
speedup_ratio
:
f64
,
)
->
Duration
{
let
start_time
=
tokio
::
time
::
Instant
::
now
();
let
mut
total_time
=
Duration
::
ZERO
;
while
let
Some
((
prefill_compute
,
maybe_creation_signal
,
is_full_prefill
))
=
...
...
@@ -422,18 +415,23 @@ fn simulate_prefill(
}
}
let
deadline
=
start_time
+
Duration
::
from_secs_f64
(
total_time
.as_secs_f64
()
/
speedup_ratio
);
tokio
::
time
::
sleep_until
(
deadline
)
.await
;
total_time
}
/// Simulate decode phase for all active decode requests.
/// Returns the total decode compute time.
fn
simulate_decode
(
async
fn
simulate_decode
(
state
:
&
mut
SchedulerState
,
kv_manager
:
&
mut
KvManager
,
output_tx
:
&
Option
<
mpsc
::
UnboundedSender
<
OutputSignal
>>
,
perf_model
:
&
PerfModel
,
block_size
:
usize
,
speedup_ratio
:
f64
,
)
->
Duration
{
let
start_time
=
tokio
::
time
::
Instant
::
now
();
// Compute decode timing
let
active_kv_tokens
=
kv_manager
.num_active_blocks
()
*
block_size
;
// Compute average context length across all active decode requests
...
...
@@ -496,6 +494,9 @@ fn simulate_decode(
}
}
let
deadline
=
start_time
+
Duration
::
from_secs_f64
(
total_time
.as_secs_f64
()
/
speedup_ratio
);
tokio
::
time
::
sleep_until
(
deadline
)
.await
;
total_time
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment