Commit 08fcd7e9 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: move libs to lib dir


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent 0bfd9a76
......@@ -37,7 +37,7 @@ use std::time::Duration;
use derive_builder::Builder;
use triton_distributed::slug::Slug;
use triton_distributed_runtime::slug::Slug;
pub const BUCKET_NAME: &str = "mdc";
......
......@@ -35,11 +35,11 @@ use tracing;
use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
use crate::preprocessor::prompt::OAIChatLikeRequest;
use triton_distributed::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed::pipeline::{
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{
async_trait, AsyncEngineContext, Error, ManyOut, Operator, SingleIn,
};
use triton_distributed::protocols::annotated::{Annotated, AnnotationsProvider};
use triton_distributed_runtime::protocols::annotated::{Annotated, AnnotationsProvider};
use crate::protocols::{
common::{SamplingOptionsProvider, StopConditionsProvider},
......
......@@ -36,7 +36,7 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
// has become the common response envelope for triton-distributed.
// We may want to move the original Annotated back here and has a Infallible conversion to the the
// ResponseEnvelop in triton-distributed.
pub use triton_distributed::protocols::annotated::Annotated;
pub use triton_distributed_runtime::protocols::annotated::Annotated;
/// The LLM responses have multiple different fields and nests of objects to get to the actual
/// text completion returned. This trait can be applied to the `choice` level objects to extract
......
......@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// Forward openai_api_rs::v1 to triton_llm::protocols::openai::v1
/// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
pub mod chat_completions;
pub mod completions;
pub mod models;
......
......@@ -40,7 +40,8 @@ use super::{
validate_logit_bias, ContentProvider, OpenAISamplingOptionsProvider,
OpenAIStopConditionsProvider,
};
use triton_distributed::protocols::annotated::AnnotationsProvider;
use triton_distributed_runtime::protocols::annotated::AnnotationsProvider;
/// Request object which is used to generate chat completions.
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
......@@ -187,7 +188,7 @@ pub struct ChatCompletionRequest {
/// in the vocabulary of the model.
///
/// ```
/// use triton_llm::protocols::openai::completions::CompletionRequest;
/// use triton_distributed_llm::protocols::openai::completions::CompletionRequest;
///
/// let request = CompletionRequest::builder()
/// .prompt("What is the meaning of life?")
......@@ -271,7 +272,7 @@ impl ChatCompletionRequestBuilder {
/// Builds and validates the ChatCompletionRequest
///
/// ```rust
/// use triton_llm::protocols::openai::chat_completions::ChatCompletionRequest;
/// use triton_distributed_llm::protocols::openai::chat_completions::ChatCompletionRequest;
///
/// let request = ChatCompletionRequest::builder()
/// .model("mixtral-8x7b-instruct-v0.1")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment