chore: Remove PreprocessedRequest alias BackendInput (#1307)

It was confusing to have two names for one type. This tidy up started in #1064 , is now complete.

chore: Remove PreprocessedRequest alias BackendInput (#1307)
It was confusing to have two names for one type. This tidy up started in #1064 , is now complete.
3f6a7472 · Graham King · GitHub · 859944f4 · 3f6a7472 · 3f6a7472
Unverified Commit 3f6a7472 authored Jun 02, 2025 by Graham King Committed by GitHub Jun 02, 2025
11 changed files
--- a/launch/dynamo-run/src/input/common.rs
+++ b/launch/dynamo-run/src/input/common.rs
@@ -9,7 +9,7 @@ use dynamo_llm::{
    engines::StreamingEngineAdapter,
    model_card::ModelDeploymentCard,
    preprocessor::OpenAIPreprocessor,
-    protocols::common::llm_backend::{BackendInput, BackendOutput},
+    protocols::common::llm_backend::{BackendOutput, PreprocessedRequest},
    types::{
        openai::chat_completions::{
            NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
@@ -113,7 +113,7 @@ where
    OpenAIPreprocessor: Operator<
        Context<Req>,
        Pin<Box<dyn AsyncEngineStream<Annotated<Resp>>>>,
-        Context<BackendInput>,
+        Context<PreprocessedRequest>,
        Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
    >,
 {

--- a/launch/dynamo-run/src/input/endpoint.rs
+++ b/launch/dynamo-run/src/input/endpoint.rs
@@ -19,7 +19,7 @@ use dynamo_llm::{
    backend::Backend,
    engines::StreamingEngineAdapter,
    model_type::ModelType,
-    preprocessor::{BackendInput, BackendOutput},
+    preprocessor::{BackendOutput, PreprocessedRequest},
    types::{
        openai::chat_completions::{
            NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
@@ -71,8 +71,10 @@ pub async fn run(
            mut model,
        } => {
            // Pre-processing is done ingress-side, so it should be already done.
-            let frontend =
-                SegmentSource::<SingleIn<BackendInput>, ManyOut<Annotated<BackendOutput>>>::new();
+            let frontend = SegmentSource::<
+                SingleIn<PreprocessedRequest>,
+                ManyOut<Annotated<BackendOutput>>,
+            >::new();
            let backend = Backend::from_mdc(model.card().clone())
                .await?
                .into_operator();

--- a/lib/bindings/python/rust/llm/backend.rs
+++ b/lib/bindings/python/rust/llm/backend.rs
@@ -16,7 +16,7 @@
 use super::*;
 use crate::llm::model_card::ModelDeploymentCard;

-use llm_rs::protocols::common::llm_backend::{BackendInput, BackendOutput};
+use llm_rs::protocols::common::llm_backend::{BackendOutput, PreprocessedRequest};
 use llm_rs::types::Annotated;

 use dynamo_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
@@ -44,8 +44,10 @@ impl Backend {
    }

    fn start<'p>(&self, py: Python<'p>, generator: PyObject) -> PyResult<Bound<'p, PyAny>> {
-        let frontend =
-            ServiceFrontend::<SingleIn<BackendInput>, ManyOut<Annotated<BackendOutput>>>::new();
+        let frontend = ServiceFrontend::<
+            SingleIn<PreprocessedRequest>,
+            ManyOut<Annotated<BackendOutput>>,
+        >::new();

        let backend = self.inner.into_operator();
        let engine = Arc::new(PythonAsyncEngine::new(

--- a/lib/bindings/python/rust/llm/preprocessor.rs
+++ b/lib/bindings/python/rust/llm/preprocessor.rs
@@ -18,7 +18,7 @@ use crate::llm::model_card::ModelDeploymentCard;

 use llm_rs::{
    preprocessor::OpenAIPreprocessor,
-    protocols::common::llm_backend::{BackendInput, BackendOutput},
+    protocols::common::llm_backend::{BackendOutput, PreprocessedRequest},
    types::{
        openai::chat_completions::{
            NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
@@ -60,7 +60,7 @@ impl OAIChatPreprocessor {
        >::new();

        let network =
-            SegmentSink::<SingleIn<BackendInput>, ManyOut<Annotated<BackendOutput>>>::new();
+            SegmentSink::<SingleIn<PreprocessedRequest>, ManyOut<Annotated<BackendOutput>>>::new();

        let preprocessor = self.inner.into_operator();
        let pipeline = frontend
@@ -77,7 +77,7 @@ impl OAIChatPreprocessor {
        let endpoint = Arc::new(self.next.inner.clone());
        pyo3_async_runtimes::tokio::future_into_py(py, async move {
            let client = endpoint.client().await.map_err(to_pyerr)?;
-            let router = PushRouter::<BackendInput, Annotated<BackendOutput>>::from_client(
+            let router = PushRouter::<PreprocessedRequest, Annotated<BackendOutput>>::from_client(
                client,
                Default::default(),
            )

--- a/lib/engines/llamacpp/src/lib.rs
+++ b/lib/engines/llamacpp/src/lib.rs
@@ -23,7 +23,7 @@ use llama_cpp_2::{
    LogOptions,
 };

-use dynamo_llm::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
+use dynamo_llm::protocols::common::llm_backend::LLMEngineOutput;
 use dynamo_llm::protocols::common::preprocessor::PreprocessedRequest;
 use dynamo_llm::{backend::ExecutionContext, local_model::LocalModel};

@@ -119,12 +119,12 @@ fn load_model(backend: &LlamaBackend, model_path: &Path) -> Result<LlamaModel> {
 }

 #[async_trait]
-impl AsyncEngine<SingleIn<BackendInput>, ManyOut<Annotated<LLMEngineOutput>>, Error>
+impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutput>>, Error>
    for LlamacppEngine
 {
    async fn generate(
        &self,
-        request: SingleIn<BackendInput>,
+        request: SingleIn<PreprocessedRequest>,
    ) -> Result<ManyOut<Annotated<LLMEngineOutput>>, Error> {
        let (request, context) = request.into_parts();
        let ctx = context.context();

--- a/lib/llm/src/backend.rs
+++ b/lib/llm/src/backend.rs
@@ -44,7 +44,7 @@ use dynamo_runtime::{

 use crate::protocols::{
    common::{
-        llm_backend::{BackendInput, BackendOutput, FinishReason, LLMEngineOutput},
+        llm_backend::{BackendOutput, FinishReason, LLMEngineOutput, PreprocessedRequest},
        StopConditions,
    },
    TokenIdType,
@@ -56,7 +56,7 @@ use tokenizers::Tokenizer as HfTokenizer;
 pub type ExecutionOutputStream = Annotated<LLMEngineOutput>;

 /// Context for executing LLM inference, engine consumes backend input and produces execution output stream
-pub type ExecutionContext = ServerStreamingEngine<BackendInput, ExecutionOutputStream>;
+pub type ExecutionContext = ServerStreamingEngine<PreprocessedRequest, ExecutionOutputStream>;

 /// Backend handles resource management and orchestrates LLM execution
 #[allow(dead_code)]
@@ -121,16 +121,16 @@ impl Backend {
 #[async_trait]
 impl
    Operator<
-        SingleIn<BackendInput>,
+        SingleIn<PreprocessedRequest>,
        ManyOut<Annotated<BackendOutput>>,
-        SingleIn<BackendInput>,
+        SingleIn<PreprocessedRequest>,
        ManyOut<Annotated<LLMEngineOutput>>,
    > for Backend
 {
    async fn generate(
        &self,
-        request: SingleIn<BackendInput>,
-        next: ServerStreamingEngine<BackendInput, Annotated<LLMEngineOutput>>,
+        request: SingleIn<PreprocessedRequest>,
+        next: ServerStreamingEngine<PreprocessedRequest, Annotated<LLMEngineOutput>>,
    ) -> Result<ManyOut<Annotated<BackendOutput>>> {
        let stop_conditions = request.stop_conditions.clone();
        let next_stream = next.generate(request).await?;

--- a/lib/llm/src/discovery/watcher.rs
+++ b/lib/llm/src/discovery/watcher.rs
@@ -20,7 +20,7 @@ use crate::{
    backend::Backend,
    kv_router::KvPushRouter,
    model_type::ModelType,
-    preprocessor::{BackendInput, OpenAIPreprocessor},
+    preprocessor::{OpenAIPreprocessor, PreprocessedRequest},
    protocols::common::llm_backend::LLMEngineOutput,
    protocols::openai::chat_completions::{
        NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
@@ -196,7 +196,8 @@ impl ModelWatcher {
                >::new();
                let preprocessor = OpenAIPreprocessor::new(card.clone()).await?.into_operator();
                let backend = Backend::from_mdc(card.clone()).await?.into_operator();
-                let router = PushRouter::<BackendInput, Annotated<LLMEngineOutput>>::from_client(
+                let router =
+                    PushRouter::<PreprocessedRequest, Annotated<LLMEngineOutput>>::from_client(
                        client.clone(),
                        self.router_mode,
                    )
@@ -231,7 +232,8 @@ impl ModelWatcher {
                >::new();
                let preprocessor = OpenAIPreprocessor::new(card.clone()).await?.into_operator();
                let backend = Backend::from_mdc(card.clone()).await?.into_operator();
-                let router = PushRouter::<BackendInput, Annotated<LLMEngineOutput>>::from_client(
+                let router =
+                    PushRouter::<PreprocessedRequest, Annotated<LLMEngineOutput>>::from_client(
                        client,
                        self.router_mode,
                    )

--- a/lib/llm/src/engines.rs
+++ b/lib/llm/src/engines.rs
@@ -26,7 +26,7 @@ use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
 use dynamo_runtime::protocols::annotated::Annotated;

 use crate::backend::ExecutionContext;
-use crate::preprocessor::BackendInput;
+use crate::preprocessor::PreprocessedRequest;
 use crate::protocols::common::llm_backend::LLMEngineOutput;
 use crate::protocols::openai::{
    chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
@@ -86,12 +86,12 @@ pub fn make_engine_core() -> ExecutionContext {
 }

 #[async_trait]
-impl AsyncEngine<SingleIn<BackendInput>, ManyOut<Annotated<LLMEngineOutput>>, Error>
+impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutput>>, Error>
    for EchoEngineCore
 {
    async fn generate(
        &self,
-        incoming_request: SingleIn<BackendInput>,
+        incoming_request: SingleIn<PreprocessedRequest>,
    ) -> Result<ManyOut<Annotated<LLMEngineOutput>>, Error> {
        let (request, context) = incoming_request.into_parts();
        let ctx = context.context();

--- a/lib/llm/src/kv_router.rs
+++ b/lib/llm/src/kv_router.rs
@@ -31,7 +31,7 @@ use crate::{
        scheduler::{KvScheduler, KvSchedulerError, SchedulingRequest},
        scoring::ProcessedEndpoints,
    },
-    preprocessor::BackendInput,
+    preprocessor::PreprocessedRequest,
    protocols::common::llm_backend::LLMEngineOutput,
    tokens::TokenBlockSequence,
 };
@@ -173,13 +173,13 @@ impl AsyncEngine<SingleIn<RouterRequest>, ManyOut<Annotated<RouterResponse>>, Er
 }

 pub struct KvPushRouter {
-    inner: PushRouter<BackendInput, Annotated<LLMEngineOutput>>,
+    inner: PushRouter<PreprocessedRequest, Annotated<LLMEngineOutput>>,
    chooser: Arc<KvRouter>,
 }

 impl KvPushRouter {
    pub fn new(
-        inner: PushRouter<BackendInput, Annotated<LLMEngineOutput>>,
+        inner: PushRouter<PreprocessedRequest, Annotated<LLMEngineOutput>>,
        chooser: Arc<KvRouter>,
    ) -> Self {
        KvPushRouter { inner, chooser }
@@ -187,12 +187,12 @@ impl KvPushRouter {
 }

 #[async_trait]
-impl AsyncEngine<SingleIn<BackendInput>, ManyOut<Annotated<LLMEngineOutput>>, Error>
+impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutput>>, Error>
    for KvPushRouter
 {
    async fn generate(
        &self,
-        request: SingleIn<BackendInput>,
+        request: SingleIn<PreprocessedRequest>,
    ) -> Result<ManyOut<Annotated<LLMEngineOutput>>, Error> {
        match self.inner.client.instance_source.as_ref() {
            InstanceSource::Static => self.inner.r#static(request).await,

--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -55,7 +55,7 @@ use crate::tokenizers::{traits::Tokenizer, HuggingFaceTokenizer};

 use crate::preprocessor::prompt::PromptFormatter;

-pub use crate::protocols::common::llm_backend::{BackendInput, BackendOutput};
+pub use crate::protocols::common::llm_backend::{BackendOutput, PreprocessedRequest};

 pub const ANNOTATION_FORMATTED_PROMPT: &str = "formatted_prompt";
 pub const ANNOTATION_TOKEN_IDS: &str = "token_ids";
@@ -121,9 +121,9 @@ impl OpenAIPreprocessor {
    >(
        &self,
        request: &R,
-    ) -> Result<(BackendInput, HashMap<String, String>)> {
+    ) -> Result<(PreprocessedRequest, HashMap<String, String>)> {
        let mut annotations = HashMap::new();
-        let mut builder = BackendInput::builder();
+        let mut builder = PreprocessedRequest::builder();

        let use_raw_prompt = request
            .nvext()
@@ -266,7 +266,7 @@ impl
    Operator<
        SingleIn<NvCreateChatCompletionRequest>,
        ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>,
-        SingleIn<BackendInput>,
+        SingleIn<PreprocessedRequest>,
        ManyOut<Annotated<BackendOutput>>,
    > for OpenAIPreprocessor
 {
@@ -274,7 +274,11 @@ impl
        &self,
        request: SingleIn<NvCreateChatCompletionRequest>,
        next: Arc<
-            dyn AsyncEngine<SingleIn<BackendInput>, ManyOut<Annotated<BackendOutput>>, Error>,
+            dyn AsyncEngine<
+                SingleIn<PreprocessedRequest>,
+                ManyOut<Annotated<BackendOutput>>,
+                Error,
+            >,
        >,
    ) -> Result<ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>, Error> {
        // unpack the request
@@ -320,7 +324,7 @@ impl
    Operator<
        SingleIn<CompletionRequest>,
        ManyOut<Annotated<CompletionResponse>>,
-        SingleIn<BackendInput>,
+        SingleIn<PreprocessedRequest>,
        ManyOut<Annotated<BackendOutput>>,
    > for OpenAIPreprocessor
 {
@@ -328,7 +332,11 @@ impl
        &self,
        request: SingleIn<CompletionRequest>,
        next: Arc<
-            dyn AsyncEngine<SingleIn<BackendInput>, ManyOut<Annotated<BackendOutput>>, Error>,
+            dyn AsyncEngine<
+                SingleIn<PreprocessedRequest>,
+                ManyOut<Annotated<BackendOutput>>,
+                Error,
+            >,
        >,
    ) -> Result<ManyOut<Annotated<CompletionResponse>>, Error> {
        // unpack the request

--- a/lib/llm/src/protocols/common/llm_backend.rs
+++ b/lib/llm/src/protocols/common/llm_backend.rs
@@ -20,7 +20,6 @@ use crate::protocols::TokenIdType;
 pub type TokenType = Option<String>;
 pub type LogProbs = Vec<f64>;

-pub use super::preprocessor::PreprocessedRequest as BackendInput; // TODO stop renaming this
 pub use super::preprocessor::PreprocessedRequest;
 pub use super::FinishReason;