Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
...@@ -35,11 +35,11 @@ use tracing; ...@@ -35,11 +35,11 @@ use tracing;
use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind}; use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
use crate::preprocessor::prompt::OAIChatLikeRequest; use crate::preprocessor::prompt::OAIChatLikeRequest;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{ use dynemo_runtime::pipeline::{
async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn, async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn,
}; };
use triton_distributed_runtime::protocols::annotated::{Annotated, AnnotationsProvider}; use dynemo_runtime::protocols::annotated::{Annotated, AnnotationsProvider};
use crate::protocols::{ use crate::protocols::{
common::{SamplingOptionsProvider, StopConditionsProvider}, common::{SamplingOptionsProvider, StopConditionsProvider},
......
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//! # Triton LLM Protocols //! # Dynemo LLM Protocols
//! //!
//! This module contains the protocols, i.e. messages formats, used to exchange requests and responses //! This module contains the protocols, i.e. messages formats, used to exchange requests and responses
//! both publicly via the HTTP API and internally between Triton components. //! both publicly via the HTTP API and internally between Dynemo components.
//! //!
use futures::{Stream, StreamExt}; use futures::{Stream, StreamExt};
...@@ -33,10 +33,10 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>; ...@@ -33,10 +33,10 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
// TODO: This is an awkward dependency that we need to address // TODO: This is an awkward dependency that we need to address
// Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated] // Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated]
// has become the common response envelope for triton-distributed. // has become the common response envelope for dynemo-distributed.
// We may want to move the original Annotated back here and has a Infallible conversion to the the // We may want to move the original Annotated back here and has a Infallible conversion to the the
// ResponseEnvelop in triton-distributed. // ResponseEnvelop in dynemo-distributed.
pub use triton_distributed_runtime::protocols::annotated::Annotated; pub use dynemo_runtime::protocols::annotated::Annotated;
/// The LLM responses have multiple different fields and nests of objects to get to the actual /// The LLM responses have multiple different fields and nests of objects to get to the actual
/// text completion returned. This trait can be applied to the `choice` level objects to extract /// text completion returned. This trait can be applied to the `choice` level objects to extract
......
...@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize}; ...@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};
use super::{SamplingOptions, StopConditions}; use super::{SamplingOptions, StopConditions};
use crate::protocols::TokenIdType; use crate::protocols::TokenIdType;
/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`triton-llm-preprocessor`] /// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`dynemo-llm-preprocessor`]
/// crate is responsible for converting request from the public APIs to this internal representation. /// crate is responsible for converting request from the public APIs to this internal representation.
#[derive(Serialize, Deserialize, Debug, Clone, Builder)] #[derive(Serialize, Deserialize, Debug, Clone, Builder)]
pub struct PreprocessedRequest { pub struct PreprocessedRequest {
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
pub mod chat_completions; pub mod chat_completions;
pub mod completions; pub mod completions;
pub mod models; pub mod models;
......
...@@ -17,8 +17,8 @@ use super::nvext::NvExt; ...@@ -17,8 +17,8 @@ use super::nvext::NvExt;
use super::nvext::NvExtProvider; use super::nvext::NvExtProvider;
use super::OpenAISamplingOptionsProvider; use super::OpenAISamplingOptionsProvider;
use super::OpenAIStopConditionsProvider; use super::OpenAIStopConditionsProvider;
use dynemo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
use validator::Validate; use validator::Validate;
mod aggregator; mod aggregator;
......
...@@ -31,7 +31,7 @@ use super::{ ...@@ -31,7 +31,7 @@ use super::{
CompletionUsage, ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider, CompletionUsage, ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
}; };
use triton_distributed_runtime::protocols::annotated::AnnotationsProvider; use dynemo_runtime::protocols::annotated::AnnotationsProvider;
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct CompletionRequest { pub struct CompletionRequest {
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#### HuggingFace Tokenizer #### HuggingFace Tokenizer
```rust ```rust
use triton_distributed_llm::tokenizers::hf::HuggingFaceTokenizer; use dynemo_llm::tokenizers::hf::HuggingFaceTokenizer;
let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json") let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json")
.expect("Failed to load HuggingFace tokenizer"); .expect("Failed to load HuggingFace tokenizer");
...@@ -22,7 +22,7 @@ let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/Tin ...@@ -22,7 +22,7 @@ let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/Tin
### Encoding and Decoding Text ### Encoding and Decoding Text
```rust ```rust
use triton_distributed_llm::tokenizers::{HuggingFaceTokenizer, traits::{Encoder, Decoder}}; use dynemo_llm::tokenizers::{HuggingFaceTokenizer, traits::{Encoder, Decoder}};
let tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json") let tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json")
.expect("Failed to load HuggingFace tokenizer"); .expect("Failed to load HuggingFace tokenizer");
...@@ -40,7 +40,7 @@ assert_eq!(text, decoded_text); ...@@ -40,7 +40,7 @@ assert_eq!(text, decoded_text);
// Using the Sequence object for encoding and decoding // Using the Sequence object for encoding and decoding
use triton_distributed_llm::tokenizers::{Sequence, Tokenizer}; use dynemo_llm::tokenizers::{Sequence, Tokenizer};
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
let tokenizer = Tokenizer::from(Arc::new(tokenizer)); let tokenizer = Tokenizer::from(Arc::new(tokenizer));
...@@ -51,4 +51,4 @@ sequence.append_text("Your sample text here") ...@@ -51,4 +51,4 @@ sequence.append_text("Your sample text here")
let delta = sequence.append_token_id(1337) let delta = sequence.append_token_id(1337)
.expect("Failed to append token_id"); .expect("Failed to append token_id");
``` ```
\ No newline at end of file
...@@ -19,7 +19,7 @@ pub use protocols::{Annotated, TokenIdType}; ...@@ -19,7 +19,7 @@ pub use protocols::{Annotated, TokenIdType};
pub mod openai { pub mod openai {
use super::*; use super::*;
use triton_distributed_runtime::pipeline::{ServerStreamingEngine, UnaryEngine}; use dynemo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
pub mod completions { pub mod completions {
use super::*; use super::*;
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use futures::StreamExt; use dynemo_llm::protocols::{
use triton_distributed_llm::protocols::{
codec::{create_message_stream, Message, SseCodecError}, codec::{create_message_stream, Message, SseCodecError},
openai::{chat_completions::NvCreateChatCompletionResponse, completions::CompletionResponse}, openai::{chat_completions::NvCreateChatCompletionResponse, completions::CompletionResponse},
ContentProvider, DataStream, ContentProvider, DataStream,
}; };
use futures::StreamExt;
const CMPL_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/completions"; const CMPL_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/completions";
const CHAT_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions"; const CHAT_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions";
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use triton_distributed_llm::backend::Backend; use dynemo_llm::backend::Backend;
use triton_distributed_llm::model_card::model::ModelDeploymentCard; use dynemo_llm::model_card::model::ModelDeploymentCard;
#[tokio::test] #[tokio::test]
async fn test_sequence_factory() { async fn test_sequence_factory() {
......
...@@ -15,28 +15,28 @@ ...@@ -15,28 +15,28 @@
use anyhow::Error; use anyhow::Error;
use async_stream::stream; use async_stream::stream;
use prometheus::{proto::MetricType, Registry}; use dynemo_llm::http::service::{
use reqwest::StatusCode;
use std::sync::Arc;
use triton_distributed_llm::http::service::{
error::HttpError, error::HttpError,
metrics::{Endpoint, RequestType, Status}, metrics::{Endpoint, RequestType, Status},
service_v2::HttpService, service_v2::HttpService,
Metrics, Metrics,
}; };
use triton_distributed_llm::protocols::{ use dynemo_llm::protocols::{
openai::{ openai::{
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse}, chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
completions::{CompletionRequest, CompletionResponse}, completions::{CompletionRequest, CompletionResponse},
}, },
Annotated, Annotated,
}; };
use triton_distributed_runtime::{ use dynemo_runtime::{
pipeline::{ pipeline::{
async_trait, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, SingleIn, async_trait, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, SingleIn,
}, },
CancellationToken, CancellationToken,
}; };
use prometheus::{proto::MetricType, Registry};
use reqwest::StatusCode;
use std::sync::Arc;
struct CounterEngine {} struct CounterEngine {}
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use dynemo_llm::model_card::model::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};
use tempfile::tempdir; use tempfile::tempdir;
use triton_distributed_llm::model_card::model::{
ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind,
};
const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1"; const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
// limitations under the License. // limitations under the License.
use async_openai::types::CreateCompletionRequestArgs; use async_openai::types::CreateCompletionRequestArgs;
use dynemo_llm::protocols::openai::{self, completions::CompletionRequest};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use triton_distributed_llm::protocols::openai::{self, completions::CompletionRequest};
#[derive(Serialize, Deserialize, Debug, Clone)] #[derive(Serialize, Deserialize, Debug, Clone)]
struct CompletionSample { struct CompletionSample {
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
use anyhow::Ok; use anyhow::Ok;
use dynemo_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
use dynemo_llm::preprocessor::prompt::PromptFormatter;
use dynemo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use triton_distributed_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
use triton_distributed_llm::preprocessor::prompt::PromptFormatter;
use triton_distributed_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use hf_hub::{api::tokio::ApiBuilder, Cache, Repo, RepoType}; use hf_hub::{api::tokio::ApiBuilder, Cache, Repo, RepoType};
......
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
expression: request expression: request
--- ---
{ {
......
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and logit_bias fields with the logits_bias having two key/value pairs" description: "should have prompt, model, and logit_bias fields with the logits_bias having two key/value pairs"
expression: sample.request expression: sample.request
--- ---
......
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and temperature fields" description: "should have prompt, model, and temperature fields"
expression: sample.request expression: sample.request
--- ---
......
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and top_p fields" description: "should have prompt, model, and top_p fields"
expression: sample.request expression: sample.request
--- ---
......
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and stop fields" description: "should have prompt, model, and stop fields"
expression: sample.request expression: sample.request
--- ---
......
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and stream fields" description: "should have prompt, model, and stream fields"
expression: sample.request expression: sample.request
--- ---
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment