refactor: move libs to lib dir

Signed-off-by: Neelay Shah <neelays@nvidia.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>

refactor: move libs to lib dir
Signed-off-by: Neelay Shah <neelays@nvidia.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
08fcd7e9 · Neelay Shah · GitHub · 0bfd9a76 · 08fcd7e9 · 08fcd7e9
Commit 08fcd7e9 authored Feb 24, 2025 by Neelay Shah Committed by GitHub Feb 24, 2025
20 changed files
--- a/llm/rust/triton-llm/src/model_card/model.rs
+++ b/llm/rust/triton-llm/src/model_card/model.rs
@@ -37,7 +37,7 @@ use std::time::Duration;

 use derive_builder::Builder;

-use triton_distributed::slug::Slug;
+use triton_distributed_runtime::slug::Slug;

 pub const BUCKET_NAME: &str = "mdc";


--- a/llm/rust/triton-llm/src/preprocessor.rs
+++ b/llm/rust/triton-llm/src/preprocessor.rs
@@ -35,11 +35,11 @@ use tracing;
 use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
 use crate::preprocessor::prompt::OAIChatLikeRequest;

-use triton_distributed::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use triton_distributed::pipeline::{
+use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use triton_distributed_runtime::pipeline::{
    async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn,
 };
-use triton_distributed::protocols::annotated::{Annotated, AnnotationsProvider};
+use triton_distributed_runtime::protocols::annotated::{Annotated, AnnotationsProvider};

 use crate::protocols::{
    common::{SamplingOptionsProvider, StopConditionsProvider},

--- a/llm/rust/triton-llm/src/preprocessor/prompt.rs
+++ b/llm/rust/triton-llm/src/preprocessor/prompt.rs
--- a/llm/rust/triton-llm/src/preprocessor/prompt/template.rs
+++ b/llm/rust/triton-llm/src/preprocessor/prompt/template.rs
--- a/llm/rust/triton-llm/src/preprocessor/prompt/template/context.rs
+++ b/llm/rust/triton-llm/src/preprocessor/prompt/template/context.rs
--- a/llm/rust/triton-llm/src/preprocessor/prompt/template/formatters.rs
+++ b/llm/rust/triton-llm/src/preprocessor/prompt/template/formatters.rs
--- a/llm/rust/triton-llm/src/preprocessor/prompt/template/oai.rs
+++ b/llm/rust/triton-llm/src/preprocessor/prompt/template/oai.rs
--- a/llm/rust/triton-llm/src/preprocessor/prompt/template/tokcfg.rs
+++ b/llm/rust/triton-llm/src/preprocessor/prompt/template/tokcfg.rs
--- a/llm/rust/triton-llm/src/preprocessor/tools.rs
+++ b/llm/rust/triton-llm/src/preprocessor/tools.rs
--- a/llm/rust/triton-llm/src/preprocessor/tools/request.rs
+++ b/llm/rust/triton-llm/src/preprocessor/tools/request.rs
--- a/llm/rust/triton-llm/src/preprocessor/tools/response.rs
+++ b/llm/rust/triton-llm/src/preprocessor/tools/response.rs
--- a/llm/rust/triton-llm/src/protocols.rs
+++ b/llm/rust/triton-llm/src/protocols.rs
@@ -36,7 +36,7 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
 // has become the common response envelope for triton-distributed.
 // We may want to move the original Annotated back here and has a Infallible conversion to the the
 // ResponseEnvelop in triton-distributed.
-pub use triton_distributed::protocols::annotated::Annotated;
+pub use triton_distributed_runtime::protocols::annotated::Annotated;

 /// The LLM responses have multiple different fields and nests of objects to get to the actual
 /// text completion returned. This trait can be applied to the `choice` level objects to extract

--- a/llm/rust/triton-llm/src/protocols/codec.rs
+++ b/llm/rust/triton-llm/src/protocols/codec.rs
--- a/llm/rust/triton-llm/src/protocols/common.rs
+++ b/llm/rust/triton-llm/src/protocols/common.rs
--- a/llm/rust/triton-llm/src/protocols/common/kv_routing.rs
+++ b/llm/rust/triton-llm/src/protocols/common/kv_routing.rs
--- a/llm/rust/triton-llm/src/protocols/common/llm_backend.rs
+++ b/llm/rust/triton-llm/src/protocols/common/llm_backend.rs
--- a/llm/rust/triton-llm/src/protocols/common/postprocessor.rs
+++ b/llm/rust/triton-llm/src/protocols/common/postprocessor.rs
--- a/llm/rust/triton-llm/src/protocols/common/preprocessor.rs
+++ b/llm/rust/triton-llm/src/protocols/common/preprocessor.rs
--- a/llm/rust/triton-llm/src/protocols/openai.rs
+++ b/llm/rust/triton-llm/src/protocols/openai.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-/// Forward openai_api_rs::v1 to triton_llm::protocols::openai::v1
+/// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
 pub mod chat_completions;
 pub mod completions;
 pub mod models;

--- a/llm/rust/triton-llm/src/protocols/openai/chat_completions.rs
+++ b/llm/rust/triton-llm/src/protocols/openai/chat_completions.rs
@@ -40,7 +40,8 @@ use super::{
    validate_logit_bias, ContentProvider, OpenAISamplingOptionsProvider,
    OpenAIStopConditionsProvider,
 };
-use triton_distributed::protocols::annotated::AnnotationsProvider;
+
+use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;

 /// Request object which is used to generate chat completions.
 #[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
@@ -187,7 +188,7 @@ pub struct ChatCompletionRequest {
    /// in the vocabulary of the model.
    ///
    /// ```
-    /// use triton_llm::protocols::openai::completions::CompletionRequest;
+    /// use triton_distributed_llm::protocols::openai::completions::CompletionRequest;
    ///
    /// let request = CompletionRequest::builder()
    ///     .prompt("What is the meaning of life?")
@@ -271,7 +272,7 @@ impl ChatCompletionRequestBuilder {
    /// Builds and validates the ChatCompletionRequest
    ///
    /// ```rust
-    /// use triton_llm::protocols::openai::chat_completions::ChatCompletionRequest;
+    /// use triton_distributed_llm::protocols::openai::chat_completions::ChatCompletionRequest;
    ///
    /// let request = ChatCompletionRequest::builder()
    ///     .model("mixtral-8x7b-instruct-v0.1")