"examples/vscode:/vscode.git/clone" did not exist on "7dca64df52f3e89c65681bfeff292e31bdf5062c"
Commit ffc6dde1 authored by Ryan Olson's avatar Ryan Olson Committed by GitHub
Browse files

feat: OpenAI compatible http service (#123)


Signed-off-by: default avatarRyan Olson <ryanolson@users.noreply.github.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
Co-authored-by: default avatarNeelay Shah <neelays@nvidia.com>
parent 9d6643b7
---
source: triton-llm/tests/openai_completions.rs
description: "should have prompt, model, and echo fields"
expression: sample.request
---
{
"model": "gpt-3.5-turbo",
"prompt": "What is the meaning of life?",
"echo": true
}
---
source: triton-llm/tests/openai_completions.rs
description: "should have prompt, model, and stream fields"
expression: sample.request
---
{
"model": "gpt-3.5-turbo",
"prompt": "What is the meaning of life?",
"stream": true
}
---
source: triton-llm/tests/openai_completions.rs
description: should have only prompt and model fields
expression: sample.request
---
{
"model": "gpt-3.5-turbo",
"prompt": "What is the meaning of life?"
}
...@@ -2326,7 +2326,7 @@ dependencies = [ ...@@ -2326,7 +2326,7 @@ dependencies = [
[[package]] [[package]]
name = "triton-distributed" name = "triton-distributed"
version = "0.1.2" version = "0.1.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"assert_matches", "assert_matches",
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
name = "triton-distributed" name = "triton-distributed"
description = "Distributed GenAI Inference Framework" description = "Distributed GenAI Inference Framework"
readme = "README.md" readme = "README.md"
version = "0.1.2" # TODO: Centralize Version Automation version = "0.1.3" # TODO: Centralize Version Automation
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
[package] [package]
name = "hello_world" name = "hello_world"
version = "0.1.0" version = "0.1.3"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
......
...@@ -1433,9 +1433,9 @@ dependencies = [ ...@@ -1433,9 +1433,9 @@ dependencies = [
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.20.2" version = "1.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
[[package]] [[package]]
name = "openssl-probe" name = "openssl-probe"
...@@ -2594,7 +2594,7 @@ dependencies = [ ...@@ -2594,7 +2594,7 @@ dependencies = [
[[package]] [[package]]
name = "triton-distributed" name = "triton-distributed"
version = "0.1.2" version = "0.1.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-nats", "async-nats",
...@@ -2632,7 +2632,7 @@ dependencies = [ ...@@ -2632,7 +2632,7 @@ dependencies = [
[[package]] [[package]]
name = "triton_distributed_py3" name = "triton_distributed_py3"
version = "0.1.1" version = "0.1.3"
dependencies = [ dependencies = [
"futures", "futures",
"once_cell", "once_cell",
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
[package] [package]
name = "triton_distributed_py3" name = "triton_distributed_py3"
version = "0.1.1" version = "0.1.3"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
...@@ -30,10 +30,10 @@ crate-type = ["cdylib"] ...@@ -30,10 +30,10 @@ crate-type = ["cdylib"]
[dependencies] [dependencies]
triton-distributed = { version = "0.1.1", path = "../" } triton-distributed = { version = "0.1.3", path = "../" }
futures = "0.3" futures = "0.3"
once_cell = "1" once_cell = "1.20.3"
serde = "1" serde = "1"
serde_json = "1.0.138" serde_json = "1.0.138"
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
[project] [project]
name = "triton-distributed-rs" name = "triton-distributed-rs"
version = "0.1.1" version = "0.1.3"
description = "Distributed LLM Framework" description = "Distributed LLM Framework"
# readme = "README.md" # readme = "README.md"
authors = [ authors = [
......
[toolchain]
channel = "1.84.1"
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
//! There are two context object defined in this module: //! There are two context object defined in this module:
//! //!
//! - [`Context`] is an input context which is propagated through the processing pipeline, //! - [`Context`] is an input context which is propagated through the processing pipeline,
//! up to the point where the input is pass to an [`nim_llm_async_engine::AsyncEngine`] for processing. //! up to the point where the input is pass to an [`triton_distributed::engine::AsyncEngine`] for processing.
//! - [`StreamContext`] is the input context transformed into to a type erased context that maintains the inputs //! - [`StreamContext`] is the input context transformed into to a type erased context that maintains the inputs
//! registry and visitors. `StreamAdaptors` will amend themselves to the [`StreamContext`] to allow for the //! registry and visitors. `StreamAdaptors` will amend themselves to the [`StreamContext`] to allow for the
...@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext { ...@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext {
} }
} }
// TODO - refactor here - this came from the nim-llm-async-engine crate // TODO - refactor here - this came from the triton-llm-async-engine crate
use tokio::sync::watch::{channel, Receiver, Sender}; use tokio::sync::watch::{channel, Receiver, Sender};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment