Unverified Commit 0fc5273c authored by Graham King's avatar Graham King Committed by GitHub
Browse files

refactor(llm): Rename EngineConfig::Static to InProcess (#4585)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent a77558d4
......@@ -148,12 +148,12 @@ async fn engine_for(
// Auto-discover backends
Ok(EngineConfig::Dynamic(Box::new(local_model)))
}
Output::Echo => Ok(EngineConfig::StaticFull {
Output::Echo => Ok(EngineConfig::InProcessText {
model: Box::new(local_model),
engine: dynamo_llm::engines::make_echo_engine(),
}),
#[cfg(feature = "mistralrs")]
Output::MistralRs => Ok(EngineConfig::StaticFull {
Output::MistralRs => Ok(EngineConfig::InProcessText {
engine: dynamo_engine_mistralrs::make_engine(&local_model).await?,
model: Box::new(local_model),
}),
......@@ -164,7 +164,7 @@ async fn engine_for(
let engine =
dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, args).await?;
Ok(EngineConfig::StaticCore {
Ok(EngineConfig::InProcessTokens {
engine,
model: Box::new(local_model),
is_prefill: false,
......
......@@ -253,7 +253,7 @@ async fn select_engine(
let inner = match args.engine_type {
EngineType::Echo => {
// There is no validation for the echo engine
RsEngineConfig::StaticFull {
RsEngineConfig::InProcessText {
model: Box::new(local_model),
engine: dynamo_llm::engines::make_echo_engine(),
}
......@@ -284,7 +284,7 @@ async fn select_engine(
)
.await?;
RsEngineConfig::StaticCore {
RsEngineConfig::InProcessTokens {
engine,
model: Box::new(local_model),
is_prefill: args.is_prefill,
......
......@@ -51,14 +51,14 @@ pub enum EngineConfig {
/// Remote networked engines that we discover via etcd
Dynamic(Box<LocalModel>),
/// A Full service engine does it's own tokenization and prompt formatting.
StaticFull {
/// A Text engine receives text, does it's own tokenization and prompt formatting.
InProcessText {
engine: Arc<dyn StreamingEngine>,
model: Box<LocalModel>,
},
/// A core engine expects to be wrapped with pre/post processors that handle tokenization.
StaticCore {
/// A Tokens engine receives tokens, expects to be wrapped with pre/post processors that handle tokenization.
InProcessTokens {
engine: ExecutionContext,
model: Box<LocalModel>,
is_prefill: bool,
......@@ -70,8 +70,8 @@ impl EngineConfig {
use EngineConfig::*;
match self {
Dynamic(lm) => lm,
StaticFull { model, .. } => model,
StaticCore { model, .. } => model,
InProcessText { model, .. } => model,
InProcessTokens { model, .. } => model,
}
}
}
......@@ -93,7 +93,7 @@ pub async fn prepare_engine(
request_template: local_model.request_template(),
})
}
EngineConfig::StaticFull { engine, model, .. } => {
EngineConfig::InProcessText { engine, model, .. } => {
let service_name = model.service_name().to_string();
tracing::debug!("Model: {service_name} with engine pre-processing");
let engine = Arc::new(StreamingEngineAdapter::new(engine));
......@@ -105,7 +105,7 @@ pub async fn prepare_engine(
card: Some(model.into_card()),
})
}
EngineConfig::StaticCore {
EngineConfig::InProcessTokens {
engine: inner_engine,
model,
..
......
......@@ -38,7 +38,7 @@ pub async fn run(
let endpoint = component.endpoint(&endpoint_id.name);
let rt_fut: Pin<Box<dyn Future<Output = _> + Send + 'static>> = match engine_config {
EngineConfig::StaticFull { engine, mut model } => {
EngineConfig::InProcessText { engine, mut model } => {
let engine = Arc::new(StreamingEngineAdapter::new(engine));
let ingress_chat = Ingress::<
Context<NvCreateChatCompletionRequest>,
......@@ -51,7 +51,7 @@ pub async fn run(
Box::pin(fut_chat)
}
EngineConfig::StaticCore {
EngineConfig::InProcessTokens {
engine: inner_engine,
mut model,
is_prefill,
......@@ -127,7 +127,7 @@ mod integration_tests {
.await
.map_err(|e| anyhow::anyhow!("Failed to create distributed runtime: {}", e))?;
let engine_config = EngineConfig::StaticFull {
let engine_config = EngineConfig::InProcessText {
engine: crate::engines::make_echo_engine(),
model: Box::new(
crate::local_model::LocalModelBuilder::default()
......
......@@ -45,7 +45,7 @@ pub async fn run(
.await?;
grpc_service
}
EngineConfig::StaticFull { engine, model, .. } => {
EngineConfig::InProcessText { engine, model, .. } => {
let grpc_service = grpc_service_builder.build()?;
let engine = Arc::new(StreamingEngineAdapter::new(engine));
let manager = grpc_service.model_manager();
......@@ -54,7 +54,7 @@ pub async fn run(
manager.add_chat_completions_model(model.service_name(), checksum, engine)?;
grpc_service
}
EngineConfig::StaticCore {
EngineConfig::InProcessTokens {
engine: inner_engine,
model,
..
......
......@@ -87,7 +87,7 @@ pub async fn run(
.await?;
http_service
}
EngineConfig::StaticFull { engine, model, .. } => {
EngineConfig::InProcessText { engine, model, .. } => {
let http_service = http_service_builder.build()?;
let engine = Arc::new(StreamingEngineAdapter::new(engine));
let manager = http_service.model_manager();
......@@ -101,7 +101,7 @@ pub async fn run(
}
http_service
}
EngineConfig::StaticCore {
EngineConfig::InProcessTokens {
engine: inner_engine,
model,
..
......
......@@ -321,7 +321,7 @@ mod integration_tests {
.unwrap();
// Create EngineConfig with EchoEngine
let engine_config = EngineConfig::StaticFull {
let engine_config = EngineConfig::InProcessText {
engine: make_echo_engine(),
model: Box::new(local_model.clone()),
};
......@@ -355,9 +355,8 @@ mod integration_tests {
model_watcher.watch(discovery_stream, None).await;
});
// Set up the engine following the StaticFull pattern from http.rs
let EngineConfig::StaticFull { engine, model, .. } = engine_config else {
panic!("Expected StaticFull config");
let EngineConfig::InProcessText { engine, model, .. } = engine_config else {
panic!("Expected InProcessText config");
};
let card = local_model.card().clone();
......@@ -373,7 +372,7 @@ mod integration_tests {
let test_component = namespace.component("test-mdc-component").unwrap();
let test_endpoint = test_component.endpoint("test-mdc-endpoint");
// This will store the MDC in etcd for discovery
// This will store the MDC in key-value store for discovery
local_model
.attach(
&test_endpoint,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment