Commit ffc6dde1 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: OpenAI compatible http service (#123)


Signed-off-by: default avatarRyan Olson <ryanolson@users.noreply.github.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
Co-authored-by: default avatarNeelay Shah <neelays@nvidia.com>
parent 9d6643b7
---
source: triton-llm/tests/openai_completions.rs
description: "should have prompt, model, and echo fields"
expression: sample.request
---
{
"model": "gpt-3.5-turbo",
"prompt": "What is the meaning of life?",
"echo": true
}
---
source: triton-llm/tests/openai_completions.rs
description: "should have prompt, model, and stream fields"
expression: sample.request
---
{
"model": "gpt-3.5-turbo",
"prompt": "What is the meaning of life?",
"stream": true
}
---
source: triton-llm/tests/openai_completions.rs
description: should have only prompt and model fields
expression: sample.request
---
{
"model": "gpt-3.5-turbo",
"prompt": "What is the meaning of life?"
}
......@@ -2326,7 +2326,7 @@ dependencies = [
[[package]]
name = "triton-distributed"
version = "0.1.2"
version = "0.1.3"
dependencies = [
"anyhow",
"assert_matches",
......
......@@ -17,7 +17,7 @@
name = "triton-distributed"
description = "Distributed GenAI Inference Framework"
readme = "README.md"
version = "0.1.2" # TODO: Centralize Version Automation
version = "0.1.3" # TODO: Centralize Version Automation
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
......
......@@ -15,7 +15,7 @@
[package]
name = "hello_world"
version = "0.1.0"
version = "0.1.3"
edition = "2021"
[dependencies]
......
......@@ -1433,9 +1433,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.20.2"
version = "1.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
[[package]]
name = "openssl-probe"
......@@ -2594,7 +2594,7 @@ dependencies = [
[[package]]
name = "triton-distributed"
version = "0.1.2"
version = "0.1.3"
dependencies = [
"anyhow",
"async-nats",
......@@ -2632,7 +2632,7 @@ dependencies = [
[[package]]
name = "triton_distributed_py3"
version = "0.1.1"
version = "0.1.3"
dependencies = [
"futures",
"once_cell",
......
......@@ -15,7 +15,7 @@
[package]
name = "triton_distributed_py3"
version = "0.1.1"
version = "0.1.3"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
......@@ -30,10 +30,10 @@ crate-type = ["cdylib"]
[dependencies]
triton-distributed = { version = "0.1.1", path = "../" }
triton-distributed = { version = "0.1.3", path = "../" }
futures = "0.3"
once_cell = "1"
once_cell = "1.20.3"
serde = "1"
serde_json = "1.0.138"
tokio = { version = "1", features = ["full"] }
......
......@@ -16,7 +16,7 @@
[project]
name = "triton-distributed-rs"
version = "0.1.1"
version = "0.1.3"
description = "Distributed LLM Framework"
# readme = "README.md"
authors = [
......
[toolchain]
channel = "1.84.1"
......@@ -17,7 +17,7 @@
//! There are two context object defined in this module:
//!
//! - [`Context`] is an input context which is propagated through the processing pipeline,
//! up to the point where the input is pass to an [`nim_llm_async_engine::AsyncEngine`] for processing.
//! up to the point where the input is pass to an [`triton_distributed::engine::AsyncEngine`] for processing.
//! - [`StreamContext`] is the input context transformed into to a type erased context that maintains the inputs
//! registry and visitors. `StreamAdaptors` will amend themselves to the [`StreamContext`] to allow for the
......@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext {
}
}
// TODO - refactor here - this came from the nim-llm-async-engine crate
// TODO - refactor here - this came from the triton-llm-async-engine crate
use tokio::sync::watch::{channel, Receiver, Sender};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment