Unverified Commit 0fc5273c authored by Graham King's avatar Graham King Committed by GitHub
Browse files

refactor(llm): Rename EngineConfig::Static to InProcess (#4585)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent a77558d4
...@@ -148,12 +148,12 @@ async fn engine_for( ...@@ -148,12 +148,12 @@ async fn engine_for(
// Auto-discover backends // Auto-discover backends
Ok(EngineConfig::Dynamic(Box::new(local_model))) Ok(EngineConfig::Dynamic(Box::new(local_model)))
} }
Output::Echo => Ok(EngineConfig::StaticFull { Output::Echo => Ok(EngineConfig::InProcessText {
model: Box::new(local_model), model: Box::new(local_model),
engine: dynamo_llm::engines::make_echo_engine(), engine: dynamo_llm::engines::make_echo_engine(),
}), }),
#[cfg(feature = "mistralrs")] #[cfg(feature = "mistralrs")]
Output::MistralRs => Ok(EngineConfig::StaticFull { Output::MistralRs => Ok(EngineConfig::InProcessText {
engine: dynamo_engine_mistralrs::make_engine(&local_model).await?, engine: dynamo_engine_mistralrs::make_engine(&local_model).await?,
model: Box::new(local_model), model: Box::new(local_model),
}), }),
...@@ -164,7 +164,7 @@ async fn engine_for( ...@@ -164,7 +164,7 @@ async fn engine_for(
let engine = let engine =
dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, args).await?; dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, args).await?;
Ok(EngineConfig::StaticCore { Ok(EngineConfig::InProcessTokens {
engine, engine,
model: Box::new(local_model), model: Box::new(local_model),
is_prefill: false, is_prefill: false,
......
...@@ -253,7 +253,7 @@ async fn select_engine( ...@@ -253,7 +253,7 @@ async fn select_engine(
let inner = match args.engine_type { let inner = match args.engine_type {
EngineType::Echo => { EngineType::Echo => {
// There is no validation for the echo engine // There is no validation for the echo engine
RsEngineConfig::StaticFull { RsEngineConfig::InProcessText {
model: Box::new(local_model), model: Box::new(local_model),
engine: dynamo_llm::engines::make_echo_engine(), engine: dynamo_llm::engines::make_echo_engine(),
} }
...@@ -284,7 +284,7 @@ async fn select_engine( ...@@ -284,7 +284,7 @@ async fn select_engine(
) )
.await?; .await?;
RsEngineConfig::StaticCore { RsEngineConfig::InProcessTokens {
engine, engine,
model: Box::new(local_model), model: Box::new(local_model),
is_prefill: args.is_prefill, is_prefill: args.is_prefill,
......
...@@ -51,14 +51,14 @@ pub enum EngineConfig { ...@@ -51,14 +51,14 @@ pub enum EngineConfig {
/// Remote networked engines that we discover via etcd /// Remote networked engines that we discover via etcd
Dynamic(Box<LocalModel>), Dynamic(Box<LocalModel>),
/// A Full service engine does it's own tokenization and prompt formatting. /// A Text engine receives text, does it's own tokenization and prompt formatting.
StaticFull { InProcessText {
engine: Arc<dyn StreamingEngine>, engine: Arc<dyn StreamingEngine>,
model: Box<LocalModel>, model: Box<LocalModel>,
}, },
/// A core engine expects to be wrapped with pre/post processors that handle tokenization. /// A Tokens engine receives tokens, expects to be wrapped with pre/post processors that handle tokenization.
StaticCore { InProcessTokens {
engine: ExecutionContext, engine: ExecutionContext,
model: Box<LocalModel>, model: Box<LocalModel>,
is_prefill: bool, is_prefill: bool,
...@@ -70,8 +70,8 @@ impl EngineConfig { ...@@ -70,8 +70,8 @@ impl EngineConfig {
use EngineConfig::*; use EngineConfig::*;
match self { match self {
Dynamic(lm) => lm, Dynamic(lm) => lm,
StaticFull { model, .. } => model, InProcessText { model, .. } => model,
StaticCore { model, .. } => model, InProcessTokens { model, .. } => model,
} }
} }
} }
...@@ -93,7 +93,7 @@ pub async fn prepare_engine( ...@@ -93,7 +93,7 @@ pub async fn prepare_engine(
request_template: local_model.request_template(), request_template: local_model.request_template(),
}) })
} }
EngineConfig::StaticFull { engine, model, .. } => { EngineConfig::InProcessText { engine, model, .. } => {
let service_name = model.service_name().to_string(); let service_name = model.service_name().to_string();
tracing::debug!("Model: {service_name} with engine pre-processing"); tracing::debug!("Model: {service_name} with engine pre-processing");
let engine = Arc::new(StreamingEngineAdapter::new(engine)); let engine = Arc::new(StreamingEngineAdapter::new(engine));
...@@ -105,7 +105,7 @@ pub async fn prepare_engine( ...@@ -105,7 +105,7 @@ pub async fn prepare_engine(
card: Some(model.into_card()), card: Some(model.into_card()),
}) })
} }
EngineConfig::StaticCore { EngineConfig::InProcessTokens {
engine: inner_engine, engine: inner_engine,
model, model,
.. ..
......
...@@ -38,7 +38,7 @@ pub async fn run( ...@@ -38,7 +38,7 @@ pub async fn run(
let endpoint = component.endpoint(&endpoint_id.name); let endpoint = component.endpoint(&endpoint_id.name);
let rt_fut: Pin<Box<dyn Future<Output = _> + Send + 'static>> = match engine_config { let rt_fut: Pin<Box<dyn Future<Output = _> + Send + 'static>> = match engine_config {
EngineConfig::StaticFull { engine, mut model } => { EngineConfig::InProcessText { engine, mut model } => {
let engine = Arc::new(StreamingEngineAdapter::new(engine)); let engine = Arc::new(StreamingEngineAdapter::new(engine));
let ingress_chat = Ingress::< let ingress_chat = Ingress::<
Context<NvCreateChatCompletionRequest>, Context<NvCreateChatCompletionRequest>,
...@@ -51,7 +51,7 @@ pub async fn run( ...@@ -51,7 +51,7 @@ pub async fn run(
Box::pin(fut_chat) Box::pin(fut_chat)
} }
EngineConfig::StaticCore { EngineConfig::InProcessTokens {
engine: inner_engine, engine: inner_engine,
mut model, mut model,
is_prefill, is_prefill,
...@@ -127,7 +127,7 @@ mod integration_tests { ...@@ -127,7 +127,7 @@ mod integration_tests {
.await .await
.map_err(|e| anyhow::anyhow!("Failed to create distributed runtime: {}", e))?; .map_err(|e| anyhow::anyhow!("Failed to create distributed runtime: {}", e))?;
let engine_config = EngineConfig::StaticFull { let engine_config = EngineConfig::InProcessText {
engine: crate::engines::make_echo_engine(), engine: crate::engines::make_echo_engine(),
model: Box::new( model: Box::new(
crate::local_model::LocalModelBuilder::default() crate::local_model::LocalModelBuilder::default()
......
...@@ -45,7 +45,7 @@ pub async fn run( ...@@ -45,7 +45,7 @@ pub async fn run(
.await?; .await?;
grpc_service grpc_service
} }
EngineConfig::StaticFull { engine, model, .. } => { EngineConfig::InProcessText { engine, model, .. } => {
let grpc_service = grpc_service_builder.build()?; let grpc_service = grpc_service_builder.build()?;
let engine = Arc::new(StreamingEngineAdapter::new(engine)); let engine = Arc::new(StreamingEngineAdapter::new(engine));
let manager = grpc_service.model_manager(); let manager = grpc_service.model_manager();
...@@ -54,7 +54,7 @@ pub async fn run( ...@@ -54,7 +54,7 @@ pub async fn run(
manager.add_chat_completions_model(model.service_name(), checksum, engine)?; manager.add_chat_completions_model(model.service_name(), checksum, engine)?;
grpc_service grpc_service
} }
EngineConfig::StaticCore { EngineConfig::InProcessTokens {
engine: inner_engine, engine: inner_engine,
model, model,
.. ..
......
...@@ -87,7 +87,7 @@ pub async fn run( ...@@ -87,7 +87,7 @@ pub async fn run(
.await?; .await?;
http_service http_service
} }
EngineConfig::StaticFull { engine, model, .. } => { EngineConfig::InProcessText { engine, model, .. } => {
let http_service = http_service_builder.build()?; let http_service = http_service_builder.build()?;
let engine = Arc::new(StreamingEngineAdapter::new(engine)); let engine = Arc::new(StreamingEngineAdapter::new(engine));
let manager = http_service.model_manager(); let manager = http_service.model_manager();
...@@ -101,7 +101,7 @@ pub async fn run( ...@@ -101,7 +101,7 @@ pub async fn run(
} }
http_service http_service
} }
EngineConfig::StaticCore { EngineConfig::InProcessTokens {
engine: inner_engine, engine: inner_engine,
model, model,
.. ..
......
...@@ -321,7 +321,7 @@ mod integration_tests { ...@@ -321,7 +321,7 @@ mod integration_tests {
.unwrap(); .unwrap();
// Create EngineConfig with EchoEngine // Create EngineConfig with EchoEngine
let engine_config = EngineConfig::StaticFull { let engine_config = EngineConfig::InProcessText {
engine: make_echo_engine(), engine: make_echo_engine(),
model: Box::new(local_model.clone()), model: Box::new(local_model.clone()),
}; };
...@@ -355,9 +355,8 @@ mod integration_tests { ...@@ -355,9 +355,8 @@ mod integration_tests {
model_watcher.watch(discovery_stream, None).await; model_watcher.watch(discovery_stream, None).await;
}); });
// Set up the engine following the StaticFull pattern from http.rs let EngineConfig::InProcessText { engine, model, .. } = engine_config else {
let EngineConfig::StaticFull { engine, model, .. } = engine_config else { panic!("Expected InProcessText config");
panic!("Expected StaticFull config");
}; };
let card = local_model.card().clone(); let card = local_model.card().clone();
...@@ -373,7 +372,7 @@ mod integration_tests { ...@@ -373,7 +372,7 @@ mod integration_tests {
let test_component = namespace.component("test-mdc-component").unwrap(); let test_component = namespace.component("test-mdc-component").unwrap();
let test_endpoint = test_component.endpoint("test-mdc-endpoint"); let test_endpoint = test_component.endpoint("test-mdc-endpoint");
// This will store the MDC in etcd for discovery // This will store the MDC in key-value store for discovery
local_model local_model
.attach( .attach(
&test_endpoint, &test_endpoint,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment