Unverified Commit ca31b3aa authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Remove binding OAIChatPreprocessor (#5516)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent 91ddf418
...@@ -159,7 +159,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { ...@@ -159,7 +159,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<llm::kv::WorkerMetricsPublisher>()?; m.add_class::<llm::kv::WorkerMetricsPublisher>()?;
m.add_class::<llm::model_card::ModelDeploymentCard>()?; m.add_class::<llm::model_card::ModelDeploymentCard>()?;
m.add_class::<llm::local_model::ModelRuntimeConfig>()?; m.add_class::<llm::local_model::ModelRuntimeConfig>()?;
m.add_class::<llm::preprocessor::OAIChatPreprocessor>()?;
m.add_class::<llm::preprocessor::MediaDecoder>()?; m.add_class::<llm::preprocessor::MediaDecoder>()?;
m.add_class::<llm::preprocessor::MediaFetcher>()?; m.add_class::<llm::preprocessor::MediaFetcher>()?;
m.add_class::<llm::kv::OverlapScores>()?; m.add_class::<llm::kv::OverlapScores>()?;
......
...@@ -2,80 +2,9 @@ ...@@ -2,80 +2,9 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use super::*; use super::*;
use crate::llm::model_card::ModelDeploymentCard;
use std::time::Duration; use std::time::Duration;
use llm_rs::{ use llm_rs::preprocessor::media::{MediaDecoder as RsMediaDecoder, MediaFetcher as RsMediaFetcher};
preprocessor::OpenAIPreprocessor,
preprocessor::media::{MediaDecoder as RsMediaDecoder, MediaFetcher as RsMediaFetcher},
protocols::common::llm_backend::{BackendOutput, PreprocessedRequest},
types::{
Annotated,
openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
},
},
};
use dynamo_runtime::pipeline::{
ManyOut, Operator, PushRouter, SegmentSink, ServiceFrontend, SingleIn, Source,
};
#[pyclass]
pub(crate) struct OAIChatPreprocessor {
inner: Arc<llm_rs::preprocessor::OpenAIPreprocessor>,
current: Endpoint,
next: Endpoint,
}
#[pymethods]
impl OAIChatPreprocessor {
#[new]
fn new(mdc: ModelDeploymentCard, current: Endpoint, next: Endpoint) -> PyResult<Self> {
let preprocessor = OpenAIPreprocessor::new(mdc.inner.clone()).map_err(to_pyerr)?;
Ok(Self {
inner: preprocessor,
current,
next,
})
}
fn start<'p>(&self, py: Python<'p>) -> PyResult<Bound<'p, PyAny>> {
let frontend = ServiceFrontend::<
SingleIn<NvCreateChatCompletionRequest>,
ManyOut<Annotated<NvCreateChatCompletionStreamResponse>>,
>::new();
let network =
SegmentSink::<SingleIn<PreprocessedRequest>, ManyOut<Annotated<BackendOutput>>>::new();
let preprocessor = self.inner.into_operator();
let pipeline = frontend
.link(preprocessor.forward_edge())
.map_err(to_pyerr)?
.link(network.clone())
.map_err(to_pyerr)?
.link(preprocessor.backward_edge())
.map_err(to_pyerr)?
.link(frontend)
.map_err(to_pyerr)?;
let ingress = Ingress::for_engine(pipeline).map_err(to_pyerr)?;
let builder = self.current.inner.endpoint_builder().handler(ingress);
let endpoint = Arc::new(self.next.inner.clone());
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let client = endpoint.client().await.map_err(to_pyerr)?;
let router = PushRouter::<PreprocessedRequest, Annotated<BackendOutput>>::from_client(
client,
Default::default(),
)
.await
.map_err(to_pyerr)?;
network.attach(Arc::new(router)).map_err(to_pyerr)?;
builder.start().await.map_err(to_pyerr)?;
Ok(())
})
}
}
#[pyclass] #[pyclass]
#[derive(Clone)] #[derive(Clone)]
......
...@@ -534,19 +534,6 @@ class ModelRuntimeConfig: ...@@ -534,19 +534,6 @@ class ModelRuntimeConfig:
"""Get an engine-specific runtime configuration value""" """Get an engine-specific runtime configuration value"""
... ...
class OAIChatPreprocessor:
"""
A preprocessor for OpenAI chat completions
"""
...
async def start(self) -> None:
"""
Start the preprocessor
"""
...
class OverlapScores: class OverlapScores:
""" """
A collection of prefix matching scores of workers for a given token ids. A collection of prefix matching scores of workers for a given token ids.
...@@ -1670,7 +1657,6 @@ __all__ = [ ...@@ -1670,7 +1657,6 @@ __all__ = [
"Context", "Context",
"KserveGrpcService", "KserveGrpcService",
"ModelDeploymentCard", "ModelDeploymentCard",
"OAIChatPreprocessor",
"PythonAsyncEngine", "PythonAsyncEngine",
"prometheus_names", "prometheus_names",
] ]
...@@ -17,7 +17,6 @@ from dynamo._core import DistributedRuntime as DistributedRuntime ...@@ -17,7 +17,6 @@ from dynamo._core import DistributedRuntime as DistributedRuntime
from dynamo._core import Endpoint as Endpoint from dynamo._core import Endpoint as Endpoint
from dynamo._core import ModelDeploymentCard as ModelDeploymentCard from dynamo._core import ModelDeploymentCard as ModelDeploymentCard
from dynamo._core import Namespace as Namespace from dynamo._core import Namespace as Namespace
from dynamo._core import OAIChatPreprocessor as OAIChatPreprocessor
def dynamo_worker(enable_nats: bool = True): def dynamo_worker(enable_nats: bool = True):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment