refactor(llm): Rename EngineConfig::Static to InProcess (#4585)

Signed-off-by: Graham King <grahamk@nvidia.com>

refactor(llm): Rename EngineConfig::Static to InProcess (#4585)
Signed-off-by: Graham King <grahamk@nvidia.com>
0fc5273c · Graham King · GitHub · a77558d4 · 0fc5273c · 0fc5273c
Unverified Commit 0fc5273c authored Nov 25, 2025 by Graham King Committed by GitHub Nov 25, 2025
8 changed files
--- a/launch/dynamo-run/src/lib.rs
+++ b/launch/dynamo-run/src/lib.rs
@@ -148,12 +148,12 @@ async fn engine_for(
            // Auto-discover backends
            Ok(EngineConfig::Dynamic(Box::new(local_model)))
        }
-        Output::Echo => Ok(EngineConfig::StaticFull {
+        Output::Echo => Ok(EngineConfig::InProcessText {
            model: Box::new(local_model),
            engine: dynamo_llm::engines::make_echo_engine(),
        }),
        #[cfg(feature = "mistralrs")]
-        Output::MistralRs => Ok(EngineConfig::StaticFull {
+        Output::MistralRs => Ok(EngineConfig::InProcessText {
            engine: dynamo_engine_mistralrs::make_engine(&local_model).await?,
            model: Box::new(local_model),
        }),
@@ -164,7 +164,7 @@ async fn engine_for(
            let engine =
                dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, args).await?;

-            Ok(EngineConfig::StaticCore {
+            Ok(EngineConfig::InProcessTokens {
                engine,
                model: Box::new(local_model),
                is_prefill: false,

--- a/lib/bindings/python/rust/llm/entrypoint.rs
+++ b/lib/bindings/python/rust/llm/entrypoint.rs
@@ -253,7 +253,7 @@ async fn select_engine(
    let inner = match args.engine_type {
        EngineType::Echo => {
            // There is no validation for the echo engine
-            RsEngineConfig::StaticFull {
+            RsEngineConfig::InProcessText {
                model: Box::new(local_model),
                engine: dynamo_llm::engines::make_echo_engine(),
            }
@@ -284,7 +284,7 @@ async fn select_engine(
            )
            .await?;

-            RsEngineConfig::StaticCore {
+            RsEngineConfig::InProcessTokens {
                engine,
                model: Box::new(local_model),
                is_prefill: args.is_prefill,

--- a/lib/llm/src/entrypoint.rs
+++ b/lib/llm/src/entrypoint.rs
@@ -51,14 +51,14 @@ pub enum EngineConfig {
    /// Remote networked engines that we discover via etcd
    Dynamic(Box<LocalModel>),

-    /// A Full service engine does it's own tokenization and prompt formatting.
-    StaticFull {
+    /// A Text engine receives text, does it's own tokenization and prompt formatting.
+    InProcessText {
        engine: Arc<dyn StreamingEngine>,
        model: Box<LocalModel>,
    },

-    /// A core engine expects to be wrapped with pre/post processors that handle tokenization.
-    StaticCore {
+    /// A Tokens engine receives tokens, expects to be wrapped with pre/post processors that handle tokenization.
+    InProcessTokens {
        engine: ExecutionContext,
        model: Box<LocalModel>,
        is_prefill: bool,
@@ -70,8 +70,8 @@ impl EngineConfig {
        use EngineConfig::*;
        match self {
            Dynamic(lm) => lm,
-            StaticFull { model, .. } => model,
-            StaticCore { model, .. } => model,
+            InProcessText { model, .. } => model,
+            InProcessTokens { model, .. } => model,
        }
    }
 }
--- a/lib/llm/src/entrypoint/input/common.rs
+++ b/lib/llm/src/entrypoint/input/common.rs
@@ -93,7 +93,7 @@ pub async fn prepare_engine(
                request_template: local_model.request_template(),
            })
        }
-        EngineConfig::StaticFull { engine, model, .. } => {
+        EngineConfig::InProcessText { engine, model, .. } => {
            let service_name = model.service_name().to_string();
            tracing::debug!("Model: {service_name} with engine pre-processing");
            let engine = Arc::new(StreamingEngineAdapter::new(engine));
@@ -105,7 +105,7 @@ pub async fn prepare_engine(
                card: Some(model.into_card()),
            })
        }
-        EngineConfig::StaticCore {
+        EngineConfig::InProcessTokens {
            engine: inner_engine,
            model,
            ..

--- a/lib/llm/src/entrypoint/input/endpoint.rs
+++ b/lib/llm/src/entrypoint/input/endpoint.rs
@@ -38,7 +38,7 @@ pub async fn run(
    let endpoint = component.endpoint(&endpoint_id.name);

    let rt_fut: Pin<Box<dyn Future<Output = _> + Send + 'static>> = match engine_config {
-        EngineConfig::StaticFull { engine, mut model } => {
+        EngineConfig::InProcessText { engine, mut model } => {
            let engine = Arc::new(StreamingEngineAdapter::new(engine));
            let ingress_chat = Ingress::<
                Context<NvCreateChatCompletionRequest>,
@@ -51,7 +51,7 @@ pub async fn run(

            Box::pin(fut_chat)
        }
-        EngineConfig::StaticCore {
+        EngineConfig::InProcessTokens {
            engine: inner_engine,
            mut model,
            is_prefill,
@@ -127,7 +127,7 @@ mod integration_tests {
            .await
            .map_err(|e| anyhow::anyhow!("Failed to create distributed runtime: {}", e))?;

-        let engine_config = EngineConfig::StaticFull {
+        let engine_config = EngineConfig::InProcessText {
            engine: crate::engines::make_echo_engine(),
            model: Box::new(
                crate::local_model::LocalModelBuilder::default()

--- a/lib/llm/src/entrypoint/input/grpc.rs
+++ b/lib/llm/src/entrypoint/input/grpc.rs
@@ -45,7 +45,7 @@ pub async fn run(
            .await?;
            grpc_service
        }
-        EngineConfig::StaticFull { engine, model, .. } => {
+        EngineConfig::InProcessText { engine, model, .. } => {
            let grpc_service = grpc_service_builder.build()?;
            let engine = Arc::new(StreamingEngineAdapter::new(engine));
            let manager = grpc_service.model_manager();
@@ -54,7 +54,7 @@ pub async fn run(
            manager.add_chat_completions_model(model.service_name(), checksum, engine)?;
            grpc_service
        }
-        EngineConfig::StaticCore {
+        EngineConfig::InProcessTokens {
            engine: inner_engine,
            model,
            ..

--- a/lib/llm/src/entrypoint/input/http.rs
+++ b/lib/llm/src/entrypoint/input/http.rs
@@ -87,7 +87,7 @@ pub async fn run(
            .await?;
            http_service
        }
-        EngineConfig::StaticFull { engine, model, .. } => {
+        EngineConfig::InProcessText { engine, model, .. } => {
            let http_service = http_service_builder.build()?;
            let engine = Arc::new(StreamingEngineAdapter::new(engine));
            let manager = http_service.model_manager();
@@ -101,7 +101,7 @@ pub async fn run(
            }
            http_service
        }
-        EngineConfig::StaticCore {
+        EngineConfig::InProcessTokens {
            engine: inner_engine,
            model,
            ..

--- a/lib/llm/tests/http_metrics.rs
+++ b/lib/llm/tests/http_metrics.rs
@@ -321,7 +321,7 @@ mod integration_tests {
            .unwrap();

        // Create EngineConfig with EchoEngine
-        let engine_config = EngineConfig::StaticFull {
+        let engine_config = EngineConfig::InProcessText {
            engine: make_echo_engine(),
            model: Box::new(local_model.clone()),
        };
@@ -355,9 +355,8 @@ mod integration_tests {
            model_watcher.watch(discovery_stream, None).await;
        });

-        // Set up the engine following the StaticFull pattern from http.rs
-        let EngineConfig::StaticFull { engine, model, .. } = engine_config else {
-            panic!("Expected StaticFull config");
+        let EngineConfig::InProcessText { engine, model, .. } = engine_config else {
+            panic!("Expected InProcessText config");
        };

        let card = local_model.card().clone();
@@ -373,7 +372,7 @@ mod integration_tests {
        let test_component = namespace.component("test-mdc-component").unwrap();
        let test_endpoint = test_component.endpoint("test-mdc-endpoint");

-        // This will store the MDC in etcd for discovery
+        // This will store the MDC in key-value store for discovery
        local_model
            .attach(
                &test_endpoint,