"vscode:/vscode.git/clone" did not exist on "562c7f51bd3964e05995c8c36b4b271e5361c9fa"
Commit 08fcd7e9 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: move libs to lib dir


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent 0bfd9a76
...@@ -37,7 +37,7 @@ use std::time::Duration; ...@@ -37,7 +37,7 @@ use std::time::Duration;
use derive_builder::Builder; use derive_builder::Builder;
use triton_distributed::slug::Slug; use triton_distributed_runtime::slug::Slug;
pub const BUCKET_NAME: &str = "mdc"; pub const BUCKET_NAME: &str = "mdc";
......
...@@ -35,11 +35,11 @@ use tracing; ...@@ -35,11 +35,11 @@ use tracing;
use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind}; use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
use crate::preprocessor::prompt::OAIChatLikeRequest; use crate::preprocessor::prompt::OAIChatLikeRequest;
use triton_distributed::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed::pipeline::{ use triton_distributed_runtime::pipeline::{
async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn, async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn,
}; };
use triton_distributed::protocols::annotated::{Annotated, AnnotationsProvider}; use triton_distributed_runtime::protocols::annotated::{Annotated, AnnotationsProvider};
use crate::protocols::{ use crate::protocols::{
common::{SamplingOptionsProvider, StopConditionsProvider}, common::{SamplingOptionsProvider, StopConditionsProvider},
......
...@@ -36,7 +36,7 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>; ...@@ -36,7 +36,7 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
// has become the common response envelope for triton-distributed. // has become the common response envelope for triton-distributed.
// We may want to move the original Annotated back here and has a Infallible conversion to the the // We may want to move the original Annotated back here and has a Infallible conversion to the the
// ResponseEnvelop in triton-distributed. // ResponseEnvelop in triton-distributed.
pub use triton_distributed::protocols::annotated::Annotated; pub use triton_distributed_runtime::protocols::annotated::Annotated;
/// The LLM responses have multiple different fields and nests of objects to get to the actual /// The LLM responses have multiple different fields and nests of objects to get to the actual
/// text completion returned. This trait can be applied to the `choice` level objects to extract /// text completion returned. This trait can be applied to the `choice` level objects to extract
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
/// Forward openai_api_rs::v1 to triton_llm::protocols::openai::v1 /// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
pub mod chat_completions; pub mod chat_completions;
pub mod completions; pub mod completions;
pub mod models; pub mod models;
......
...@@ -40,7 +40,8 @@ use super::{ ...@@ -40,7 +40,8 @@ use super::{
validate_logit_bias, ContentProvider, OpenAISamplingOptionsProvider, validate_logit_bias, ContentProvider, OpenAISamplingOptionsProvider,
OpenAIStopConditionsProvider, OpenAIStopConditionsProvider,
}; };
use triton_distributed::protocols::annotated::AnnotationsProvider;
use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
/// Request object which is used to generate chat completions. /// Request object which is used to generate chat completions.
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)] #[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
...@@ -187,7 +188,7 @@ pub struct ChatCompletionRequest { ...@@ -187,7 +188,7 @@ pub struct ChatCompletionRequest {
/// in the vocabulary of the model. /// in the vocabulary of the model.
/// ///
/// ``` /// ```
/// use triton_llm::protocols::openai::completions::CompletionRequest; /// use triton_distributed_llm::protocols::openai::completions::CompletionRequest;
/// ///
/// let request = CompletionRequest::builder() /// let request = CompletionRequest::builder()
/// .prompt("What is the meaning of life?") /// .prompt("What is the meaning of life?")
...@@ -271,7 +272,7 @@ impl ChatCompletionRequestBuilder { ...@@ -271,7 +272,7 @@ impl ChatCompletionRequestBuilder {
/// Builds and validates the ChatCompletionRequest /// Builds and validates the ChatCompletionRequest
/// ///
/// ```rust /// ```rust
/// use triton_llm::protocols::openai::chat_completions::ChatCompletionRequest; /// use triton_distributed_llm::protocols::openai::chat_completions::ChatCompletionRequest;
/// ///
/// let request = ChatCompletionRequest::builder() /// let request = ChatCompletionRequest::builder()
/// .model("mixtral-8x7b-instruct-v0.1") /// .model("mixtral-8x7b-instruct-v0.1")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment