refactor: rename triton_distributed to dynemo (#22)

Co-authored-by: Graham King <grahamk@nvidia.com>

refactor: rename triton_distributed to dynemo (#22)
Co-authored-by: Graham King <grahamk@nvidia.com>
1af7433b · Neelay Shah · GitHub · ee4ef06b · 1af7433b · 1af7433b
Commit 1af7433b authored Mar 05, 2025 by Neelay Shah Committed by GitHub Mar 05, 2025
20 changed files
--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -35,11 +35,11 @@ use tracing;
 use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
 use crate::preprocessor::prompt::OAIChatLikeRequest;

-use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use triton_distributed_runtime::pipeline::{
+use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynemo_runtime::pipeline::{
    async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn,
 };
-use triton_distributed_runtime::protocols::annotated::{Annotated, AnnotationsProvider};
+use dynemo_runtime::protocols::annotated::{Annotated, AnnotationsProvider};

 use crate::protocols::{
    common::{SamplingOptionsProvider, StopConditionsProvider},

--- a/lib/llm/src/protocols.rs
+++ b/lib/llm/src/protocols.rs
@@ -13,10 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-//! # Triton LLM Protocols
+//! # Dynemo LLM Protocols
 //!
 //! This module contains the protocols, i.e. messages formats, used to exchange requests and responses
-//! both publicly via the HTTP API and internally between Triton components.
+//! both publicly via the HTTP API and internally between Dynemo components.
 //!

 use futures::{Stream, StreamExt};
@@ -33,10 +33,10 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;

 // TODO: This is an awkward dependency that we need to address
 // Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated]
-// has become the common response envelope for triton-distributed.
+// has become the common response envelope for dynemo-distributed.
 // We may want to move the original Annotated back here and has a Infallible conversion to the the
-// ResponseEnvelop in triton-distributed.
-pub use triton_distributed_runtime::protocols::annotated::Annotated;
+// ResponseEnvelop in dynemo-distributed.
+pub use dynemo_runtime::protocols::annotated::Annotated;

 /// The LLM responses have multiple different fields and nests of objects to get to the actual
 /// text completion returned. This trait can be applied to the `choice` level objects to extract

--- a/lib/llm/src/protocols/common/preprocessor.rs
+++ b/lib/llm/src/protocols/common/preprocessor.rs
@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};
 use super::{SamplingOptions, StopConditions};
 use crate::protocols::TokenIdType;

-/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`triton-llm-preprocessor`]
+/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`dynemo-llm-preprocessor`]
 /// crate is responsible for converting request from the public APIs to this internal representation.
 #[derive(Serialize, Deserialize, Debug, Clone, Builder)]
 pub struct PreprocessedRequest {

--- a/lib/llm/src/protocols/openai.rs
+++ b/lib/llm/src/protocols/openai.rs
@@ -13,7 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-/// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
 pub mod chat_completions;
 pub mod completions;
 pub mod models;

--- a/lib/llm/src/protocols/openai/chat_completions.rs
+++ b/lib/llm/src/protocols/openai/chat_completions.rs
@@ -17,8 +17,8 @@ use super::nvext::NvExt;
 use super::nvext::NvExtProvider;
 use super::OpenAISamplingOptionsProvider;
 use super::OpenAIStopConditionsProvider;
+use dynemo_runtime::protocols::annotated::AnnotationsProvider;
 use serde::{Deserialize, Serialize};
-use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
 use validator::Validate;

 mod aggregator;

--- a/lib/llm/src/protocols/openai/completions.rs
+++ b/lib/llm/src/protocols/openai/completions.rs
@@ -31,7 +31,7 @@ use super::{
    CompletionUsage, ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
 };

-use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
+use dynemo_runtime::protocols::annotated::AnnotationsProvider;

 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct CompletionRequest {

--- a/lib/llm/src/tokenizers/README.md
+++ b/lib/llm/src/tokenizers/README.md
@@ -13,7 +13,7 @@

 #### HuggingFace Tokenizer
 ```rust
-use triton_distributed_llm::tokenizers::hf::HuggingFaceTokenizer;
+use dynemo_llm::tokenizers::hf::HuggingFaceTokenizer;

 let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json")
    .expect("Failed to load HuggingFace tokenizer");
@@ -22,7 +22,7 @@ let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/Tin
 ### Encoding and Decoding Text

 ```rust
-use triton_distributed_llm::tokenizers::{HuggingFaceTokenizer, traits::{Encoder, Decoder}};
+use dynemo_llm::tokenizers::{HuggingFaceTokenizer, traits::{Encoder, Decoder}};

 let tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json")
    .expect("Failed to load HuggingFace tokenizer");
@@ -40,7 +40,7 @@ assert_eq!(text, decoded_text);

 // Using the Sequence object for encoding and decoding

-use triton_distributed_llm::tokenizers::{Sequence, Tokenizer};
+use dynemo_llm::tokenizers::{Sequence, Tokenizer};
 use std::sync::{Arc, RwLock};

 let tokenizer = Tokenizer::from(Arc::new(tokenizer));
@@ -51,4 +51,4 @@ sequence.append_text("Your sample text here")

 let delta = sequence.append_token_id(1337)
    .expect("Failed to append token_id");
-```
\ No newline at end of file
+```
--- a/lib/llm/src/types.rs
+++ b/lib/llm/src/types.rs
@@ -19,7 +19,7 @@ pub use protocols::{Annotated, TokenIdType};

 pub mod openai {
    use super::*;
-    use triton_distributed_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
+    use dynemo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};

    pub mod completions {
        use super::*;

--- a/lib/llm/tests/aggregators.rs
+++ b/lib/llm/tests/aggregators.rs
@@ -13,12 +13,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use futures::StreamExt;
-use triton_distributed_llm::protocols::{
+use dynemo_llm::protocols::{
    codec::{create_message_stream, Message, SseCodecError},
    openai::{chat_completions::NvCreateChatCompletionResponse, completions::CompletionResponse},
    ContentProvider, DataStream,
 };
+use futures::StreamExt;

 const CMPL_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/completions";
 const CHAT_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions";

--- a/lib/llm/tests/backend.rs
+++ b/lib/llm/tests/backend.rs
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use triton_distributed_llm::backend::Backend;
-use triton_distributed_llm::model_card::model::ModelDeploymentCard;
+use dynemo_llm::backend::Backend;
+use dynemo_llm::model_card::model::ModelDeploymentCard;

 #[tokio::test]
 async fn test_sequence_factory() {

--- a/lib/llm/tests/http-service.rs
+++ b/lib/llm/tests/http-service.rs
@@ -15,28 +15,28 @@

 use anyhow::Error;
 use async_stream::stream;
-use prometheus::{proto::MetricType, Registry};
-use reqwest::StatusCode;
-use std::sync::Arc;
-use triton_distributed_llm::http::service::{
+use dynemo_llm::http::service::{
    error::HttpError,
    metrics::{Endpoint, RequestType, Status},
    service_v2::HttpService,
    Metrics,
 };
-use triton_distributed_llm::protocols::{
+use dynemo_llm::protocols::{
    openai::{
        chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
        completions::{CompletionRequest, CompletionResponse},
    },
    Annotated,
 };
-use triton_distributed_runtime::{
+use dynemo_runtime::{
    pipeline::{
        async_trait, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, SingleIn,
    },
    CancellationToken,
 };
+use prometheus::{proto::MetricType, Registry};
+use reqwest::StatusCode;
+use std::sync::Arc;

 struct CounterEngine {}


--- a/lib/llm/tests/model_card.rs
+++ b/lib/llm/tests/model_card.rs
@@ -13,10 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use dynemo_llm::model_card::model::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};
 use tempfile::tempdir;
-use triton_distributed_llm::model_card::model::{
-    ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind,
-};

 const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";


--- a/lib/llm/tests/openai_completions.rs
+++ b/lib/llm/tests/openai_completions.rs
@@ -14,8 +14,8 @@
 // limitations under the License.

 use async_openai::types::CreateCompletionRequestArgs;
+use dynemo_llm::protocols::openai::{self, completions::CompletionRequest};
 use serde::{Deserialize, Serialize};
-use triton_distributed_llm::protocols::openai::{self, completions::CompletionRequest};

 #[derive(Serialize, Deserialize, Debug, Clone)]
 struct CompletionSample {

--- a/lib/llm/tests/preprocessor.rs
+++ b/lib/llm/tests/preprocessor.rs
@@ -15,10 +15,10 @@

 use anyhow::Ok;

+use dynemo_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
+use dynemo_llm::preprocessor::prompt::PromptFormatter;
+use dynemo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
 use serde::{Deserialize, Serialize};
-use triton_distributed_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
-use triton_distributed_llm::preprocessor::prompt::PromptFormatter;
-use triton_distributed_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;

 use hf_hub::{api::tokio::ApiBuilder, Cache, Repo, RepoType};


--- a/lib/llm/tests/snapshots/openai_completions__minimum_viable_request.snap
+++ b/lib/llm/tests/snapshots/openai_completions__minimum_viable_request.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 expression: request
 ---
 {

--- a/lib/llm/tests/snapshots/openai_completions__valid_samples-10.snap
+++ b/lib/llm/tests/snapshots/openai_completions__valid_samples-10.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 description: "should have prompt, model, and logit_bias fields with the logits_bias having two key/value pairs"
 expression: sample.request
 ---

--- a/lib/llm/tests/snapshots/openai_completions__valid_samples-3.snap
+++ b/lib/llm/tests/snapshots/openai_completions__valid_samples-3.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 description: "should have prompt, model, and temperature fields"
 expression: sample.request
 ---

--- a/lib/llm/tests/snapshots/openai_completions__valid_samples-4.snap
+++ b/lib/llm/tests/snapshots/openai_completions__valid_samples-4.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 description: "should have prompt, model, and top_p fields"
 expression: sample.request
 ---

--- a/lib/llm/tests/snapshots/openai_completions__valid_samples-7.snap
+++ b/lib/llm/tests/snapshots/openai_completions__valid_samples-7.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 description: "should have prompt, model, and stop fields"
 expression: sample.request
 ---

--- a/lib/llm/tests/snapshots/openai_completions__valid_samples-9.snap
+++ b/lib/llm/tests/snapshots/openai_completions__valid_samples-9.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 description: "should have prompt, model, and stream fields"
 expression: sample.request
 ---