Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
......@@ -35,11 +35,11 @@ use tracing;
use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
use crate::preprocessor::prompt::OAIChatLikeRequest;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{
async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn,
};
use triton_distributed_runtime::protocols::annotated::{Annotated, AnnotationsProvider};
use dynemo_runtime::protocols::annotated::{Annotated, AnnotationsProvider};
use crate::protocols::{
common::{SamplingOptionsProvider, StopConditionsProvider},
......
......@@ -13,10 +13,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Triton LLM Protocols
//! # Dynemo LLM Protocols
//!
//! This module contains the protocols, i.e. messages formats, used to exchange requests and responses
//! both publicly via the HTTP API and internally between Triton components.
//! both publicly via the HTTP API and internally between Dynemo components.
//!
use futures::{Stream, StreamExt};
......@@ -33,10 +33,10 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
// TODO: This is an awkward dependency that we need to address
// Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated]
// has become the common response envelope for triton-distributed.
// has become the common response envelope for dynemo-distributed.
// We may want to move the original Annotated back here and has a Infallible conversion to the the
// ResponseEnvelop in triton-distributed.
pub use triton_distributed_runtime::protocols::annotated::Annotated;
// ResponseEnvelop in dynemo-distributed.
pub use dynemo_runtime::protocols::annotated::Annotated;
/// The LLM responses have multiple different fields and nests of objects to get to the actual
/// text completion returned. This trait can be applied to the `choice` level objects to extract
......
......@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};
use super::{SamplingOptions, StopConditions};
use crate::protocols::TokenIdType;
/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`triton-llm-preprocessor`]
/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`dynemo-llm-preprocessor`]
/// crate is responsible for converting request from the public APIs to this internal representation.
#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
pub struct PreprocessedRequest {
......
......@@ -13,7 +13,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
pub mod chat_completions;
pub mod completions;
pub mod models;
......
......@@ -17,8 +17,8 @@ use super::nvext::NvExt;
use super::nvext::NvExtProvider;
use super::OpenAISamplingOptionsProvider;
use super::OpenAIStopConditionsProvider;
use dynemo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize};
use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
use validator::Validate;
mod aggregator;
......
......@@ -31,7 +31,7 @@ use super::{
CompletionUsage, ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
};
use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
use dynemo_runtime::protocols::annotated::AnnotationsProvider;
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct CompletionRequest {
......
......@@ -13,7 +13,7 @@
#### HuggingFace Tokenizer
```rust
use triton_distributed_llm::tokenizers::hf::HuggingFaceTokenizer;
use dynemo_llm::tokenizers::hf::HuggingFaceTokenizer;
let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json")
.expect("Failed to load HuggingFace tokenizer");
......@@ -22,7 +22,7 @@ let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/Tin
### Encoding and Decoding Text
```rust
use triton_distributed_llm::tokenizers::{HuggingFaceTokenizer, traits::{Encoder, Decoder}};
use dynemo_llm::tokenizers::{HuggingFaceTokenizer, traits::{Encoder, Decoder}};
let tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/TinyLlama_v1.1/tokenizer.json")
.expect("Failed to load HuggingFace tokenizer");
......@@ -40,7 +40,7 @@ assert_eq!(text, decoded_text);
// Using the Sequence object for encoding and decoding
use triton_distributed_llm::tokenizers::{Sequence, Tokenizer};
use dynemo_llm::tokenizers::{Sequence, Tokenizer};
use std::sync::{Arc, RwLock};
let tokenizer = Tokenizer::from(Arc::new(tokenizer));
......@@ -51,4 +51,4 @@ sequence.append_text("Your sample text here")
let delta = sequence.append_token_id(1337)
.expect("Failed to append token_id");
```
\ No newline at end of file
```
......@@ -19,7 +19,7 @@ pub use protocols::{Annotated, TokenIdType};
pub mod openai {
use super::*;
use triton_distributed_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
use dynemo_runtime::pipeline::{ServerStreamingEngine, UnaryEngine};
pub mod completions {
use super::*;
......
......@@ -13,12 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use futures::StreamExt;
use triton_distributed_llm::protocols::{
use dynemo_llm::protocols::{
codec::{create_message_stream, Message, SseCodecError},
openai::{chat_completions::NvCreateChatCompletionResponse, completions::CompletionResponse},
ContentProvider, DataStream,
};
use futures::StreamExt;
const CMPL_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/completions";
const CHAT_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions";
......
......@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use triton_distributed_llm::backend::Backend;
use triton_distributed_llm::model_card::model::ModelDeploymentCard;
use dynemo_llm::backend::Backend;
use dynemo_llm::model_card::model::ModelDeploymentCard;
#[tokio::test]
async fn test_sequence_factory() {
......
......@@ -15,28 +15,28 @@
use anyhow::Error;
use async_stream::stream;
use prometheus::{proto::MetricType, Registry};
use reqwest::StatusCode;
use std::sync::Arc;
use triton_distributed_llm::http::service::{
use dynemo_llm::http::service::{
error::HttpError,
metrics::{Endpoint, RequestType, Status},
service_v2::HttpService,
Metrics,
};
use triton_distributed_llm::protocols::{
use dynemo_llm::protocols::{
openai::{
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
completions::{CompletionRequest, CompletionResponse},
},
Annotated,
};
use triton_distributed_runtime::{
use dynemo_runtime::{
pipeline::{
async_trait, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, SingleIn,
},
CancellationToken,
};
use prometheus::{proto::MetricType, Registry};
use reqwest::StatusCode;
use std::sync::Arc;
struct CounterEngine {}
......
......@@ -13,10 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use dynemo_llm::model_card::model::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};
use tempfile::tempdir;
use triton_distributed_llm::model_card::model::{
ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind,
};
const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";
......
......@@ -14,8 +14,8 @@
// limitations under the License.
use async_openai::types::CreateCompletionRequestArgs;
use dynemo_llm::protocols::openai::{self, completions::CompletionRequest};
use serde::{Deserialize, Serialize};
use triton_distributed_llm::protocols::openai::{self, completions::CompletionRequest};
#[derive(Serialize, Deserialize, Debug, Clone)]
struct CompletionSample {
......
......@@ -15,10 +15,10 @@
use anyhow::Ok;
use dynemo_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
use dynemo_llm::preprocessor::prompt::PromptFormatter;
use dynemo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use serde::{Deserialize, Serialize};
use triton_distributed_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
use triton_distributed_llm::preprocessor::prompt::PromptFormatter;
use triton_distributed_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use hf_hub::{api::tokio::ApiBuilder, Cache, Repo, RepoType};
......
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
expression: request
---
{
......
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and logit_bias fields with the logits_bias having two key/value pairs"
expression: sample.request
---
......
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and temperature fields"
expression: sample.request
---
......
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and top_p fields"
expression: sample.request
---
......
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and stop fields"
expression: sample.request
---
......
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
description: "should have prompt, model, and stream fields"
expression: sample.request
---
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment