Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d5f425ab
Unverified
Commit
d5f425ab
authored
Dec 09, 2025
by
Graham King
Committed by
GitHub
Dec 09, 2025
Browse files
chore(pipeline): Move migration outside of backend (#4823)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
7c15166d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
33 deletions
+29
-33
lib/llm/src/entrypoint/input/common.rs
lib/llm/src/entrypoint/input/common.rs
+2
-2
lib/llm/src/migration.rs
lib/llm/src/migration.rs
+27
-31
No files found.
lib/llm/src/entrypoint/input/common.rs
View file @
d5f425ab
...
...
@@ -271,13 +271,13 @@ where
// Link with prefill chooser including backward edge for response flow
let
engine
=
frontend
.link
(
preprocessor_op
.forward_edge
())
?
.link
(
backend
.forward_edge
())
?
.link
(
migration
.forward_edge
())
?
.link
(
backend
.forward_edge
())
?
.link
(
prefill_op
.forward_edge
())
?
.link
(
service_backend
)
?
.link
(
prefill_op
.backward_edge
())
?
.link
(
migration
.backward_edge
())
?
.link
(
backend
.backward_edge
())
?
.link
(
migration
.backward_edge
())
?
.link
(
preprocessor_op
.backward_edge
())
?
.link
(
frontend
)
?
;
...
...
lib/llm/src/migration.rs
View file @
d5f425ab
...
...
@@ -11,8 +11,8 @@ use async_nats::client::{
};
use
crate
::{
model_card
::
ModelDeploymentCard
,
protocols
::
common
::
llm_backend
::
{
LLMEngineOutput
,
PreprocessedRequest
}
,
model_card
::
ModelDeploymentCard
,
preprocessor
::
BackendOutput
,
protocols
::
common
::
llm_backend
::
PreprocessedRequest
,
};
use
dynamo_runtime
::{
...
...
@@ -44,16 +44,16 @@ impl Migration {
impl
Operator
<
SingleIn
<
PreprocessedRequest
>
,
ManyOut
<
Annotated
<
LLMEngine
Output
>>
,
ManyOut
<
Annotated
<
Backend
Output
>>
,
SingleIn
<
PreprocessedRequest
>
,
ManyOut
<
Annotated
<
LLMEngine
Output
>>
,
ManyOut
<
Annotated
<
Backend
Output
>>
,
>
for
Migration
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
PreprocessedRequest
>
,
next
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
,
)
->
Result
<
ManyOut
<
Annotated
<
LLMEngine
Output
>>>
{
next
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
,
)
->
Result
<
ManyOut
<
Annotated
<
Backend
Output
>>>
{
let
(
preprocessed_request
,
context
)
=
request
.transfer
(());
let
engine_ctx
=
context
.context
();
let
engine_ctx_
=
engine_ctx
.clone
();
...
...
@@ -73,8 +73,8 @@ impl
struct
RetryManager
{
context
:
Arc
<
dyn
AsyncEngineContext
>
,
request
:
PreprocessedRequest
,
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
,
next_stream
:
Option
<
ManyOut
<
Annotated
<
LLMEngine
Output
>>>
,
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
,
next_stream
:
Option
<
ManyOut
<
Annotated
<
Backend
Output
>>>
,
retries_left
:
u32
,
}
...
...
@@ -82,7 +82,7 @@ impl RetryManager {
pub
async
fn
build
(
context
:
Arc
<
dyn
AsyncEngineContext
>
,
preprocessed_request
:
PreprocessedRequest
,
next
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
,
next
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
,
retries_left
:
u32
,
)
->
Result
<
Self
>
{
let
mut
slf
=
Self
{
...
...
@@ -96,7 +96,7 @@ impl RetryManager {
Ok
(
slf
)
}
pub
async
fn
next
(
&
mut
self
)
->
Option
<
Annotated
<
LLMEngine
Output
>>
{
pub
async
fn
next
(
&
mut
self
)
->
Option
<
Annotated
<
Backend
Output
>>
{
loop
{
let
response_stream
=
match
self
.next_stream
.as_mut
()
{
Some
(
stream
)
=>
stream
,
...
...
@@ -128,7 +128,7 @@ impl RetryManager {
}
async
fn
new_stream
(
&
mut
self
)
->
Result
<
()
>
{
let
mut
response_stream
:
Option
<
Result
<
ManyOut
<
Annotated
<
LLMEngine
Output
>>>>
=
None
;
let
mut
response_stream
:
Option
<
Result
<
ManyOut
<
Annotated
<
Backend
Output
>>>>
=
None
;
while
self
.retries_left
>
0
{
self
.retries_left
-=
1
;
let
request
=
Context
::
with_id
(
self
.request
.clone
(),
self
.context
.id
()
.to_string
());
...
...
@@ -162,7 +162,7 @@ impl RetryManager {
}
}
fn
track_response
(
&
mut
self
,
response
:
&
Annotated
<
LLMEngine
Output
>
)
{
fn
track_response
(
&
mut
self
,
response
:
&
Annotated
<
Backend
Output
>
)
{
if
self
.retries_left
==
0
{
return
;
}
...
...
@@ -207,18 +207,17 @@ mod tests {
}
// Helper to create mock LLM engine output
fn
create_mock_output
(
token_id
:
u32
)
->
Annotated
<
LLMEngine
Output
>
{
Annotated
::
from_data
(
LLMEngine
Output
{
fn
create_mock_output
(
token_id
:
u32
)
->
Annotated
<
Backend
Output
>
{
Annotated
::
from_data
(
Backend
Output
{
token_ids
:
vec!
[
token_id
],
tokens
:
None
,
text
:
Some
(
format!
(
"token_{
}"
,
token_id
)),
tokens
:
vec!
[]
,
text
:
Some
(
format!
(
"token_{token_id
}"
)),
cum_log_probs
:
None
,
log_probs
:
None
,
top_logprobs
:
None
,
finish_reason
:
None
,
index
:
None
,
disaggregated_params
:
None
,
extra_args
:
None
,
completion_usage
:
None
,
})
}
...
...
@@ -267,16 +266,13 @@ mod tests {
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
PreprocessedRequest
>
,
ManyOut
<
Annotated
<
LLMEngineOutput
>>
,
anyhow
::
Error
,
>
for
MockEngine
AsyncEngine
<
SingleIn
<
PreprocessedRequest
>
,
ManyOut
<
Annotated
<
BackendOutput
>>
,
anyhow
::
Error
>
for
MockEngine
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
PreprocessedRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
LLMEngine
Output
>>>
{
)
->
Result
<
ManyOut
<
Annotated
<
Backend
Output
>>>
{
let
call_num
=
self
.call_count
.fetch_add
(
1
,
Ordering
::
SeqCst
);
let
(
preprocessed_request
,
context
)
=
request
.transfer
(());
...
...
@@ -457,7 +453,7 @@ mod tests {
&
self
,
start
:
usize
,
end
:
usize
,
)
->
Result
<
ManyOut
<
Annotated
<
LLMEngine
Output
>>>
{
)
->
Result
<
ManyOut
<
Annotated
<
Backend
Output
>>>
{
let
(
tx
,
rx
)
=
mpsc
::
channel
(
1
);
let
token_offset
=
self
.token_offset
;
...
...
@@ -494,7 +490,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
let
ctx
=
Arc
::
new
(
Controller
::
new
(
context_id
.clone
()));
...
...
@@ -533,7 +529,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
let
ctx
=
Arc
::
new
(
Controller
::
new
(
context_id
.clone
()));
...
...
@@ -573,7 +569,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
let
ctx
=
Arc
::
new
(
Controller
::
new
(
context_id
.clone
()));
...
...
@@ -613,7 +609,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
// Should fail to build due to initial stream creation failure after exhausting all 3 retries
...
...
@@ -641,7 +637,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
let
ctx
=
Arc
::
new
(
Controller
::
new
(
context_id
.clone
()));
...
...
@@ -690,7 +686,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
let
ctx
=
Arc
::
new
(
Controller
::
new
(
context_id
.clone
()));
...
...
@@ -739,7 +735,7 @@ mod tests {
100
,
context_id
.clone
(),
));
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
LLMEngine
Output
>>
=
let
next_generate
:
ServerStreamingEngine
<
PreprocessedRequest
,
Annotated
<
Backend
Output
>>
=
mock_engine
;
let
ctx
=
Arc
::
new
(
Controller
::
new
(
context_id
.clone
()));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment