Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
......@@ -17,12 +17,12 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
await init(runtime, "triton-init")
await init(runtime, "dynemo")
async def init(runtime: DistributedRuntime, ns: str):
......
......@@ -21,7 +21,7 @@ import uvloop
from client import init as client_init
from server import init as server_init
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
def random_string(length=10):
......@@ -29,7 +29,7 @@ def random_string(length=10):
return "".join(random.choices(chars, k=length))
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
ns = random_string()
task = asyncio.create_task(server_init(runtime, ns))
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
class RequestHandler:
......@@ -31,9 +31,9 @@ class RequestHandler:
yield char
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
await init(runtime, "triton-init")
await init(runtime, "dynemo")
async def init(runtime: DistributedRuntime, ns: str):
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
......@@ -29,7 +29,7 @@ class RequestHandler:
yield char
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
component = runtime.namespace("examples/pipeline").component("backend")
await component.create_service()
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
......@@ -32,7 +32,7 @@ class RequestHandler:
yield output.get("data")
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
# client to the next component - in this case the middle component
next = (
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
......@@ -32,7 +32,7 @@ class RequestHandler:
yield output.get("data")
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
# client to backend
backend = (
......
......@@ -17,12 +17,12 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
"""
# Pipeline Example
......
......@@ -17,18 +17,16 @@ import asyncio
from protocol import Request
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
"""
Instantiate a `backend` client and call the `generate` endpoint
"""
# get endpoint
endpoint = (
runtime.namespace("triton-init").component("backend").endpoint("generate")
)
endpoint = runtime.namespace("dynemo").component("backend").endpoint("generate")
# create client
client = await endpoint.client()
......
......@@ -19,11 +19,7 @@ import asyncio
import uvloop
from protocol import Request, Response
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
uvloop.install()
......@@ -33,19 +29,19 @@ class RequestHandler:
Request handler for the generate endpoint
"""
@triton_endpoint(Request, Response)
@dynemo_endpoint(Request, Response)
async def generate(self, request):
for char in request.data:
yield char
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
"""
Instantiate a `backend` component and serve the `generate` endpoint
A `Component` can serve multiple endpoints
"""
component = runtime.namespace("triton-init").component("backend")
component = runtime.namespace("dynemo").component("backend")
await component.create_service()
endpoint = component.endpoint("generate")
......
......@@ -15,8 +15,7 @@
use std::sync::Arc;
pub use serde::{Deserialize, Serialize};
pub use triton_distributed_runtime::{
pub use dynemo_runtime::{
error,
pipeline::{
async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,
......@@ -25,6 +24,7 @@ pub use triton_distributed_runtime::{
protocols::annotated::Annotated,
Error, Result,
};
pub use serde::{Deserialize, Serialize};
use pyo3::prelude::*;
use pyo3_async_runtimes::TaskLocals;
......@@ -192,7 +192,6 @@ where
// tell the python async generator to stop generating
// right now, this is impossible as we are not passing the context to the python async generator
// todo: add task-local context to the python async generator
// see: https://github.com/triton-inference-server/triton_distributed/issues/130
ctx.stop_generating();
let msg = format!("critical error: invalid response object from python async generator; application-logic-mismatch: {}", e);
tracing::error!(request_id, "{}", msg);
......
......@@ -25,14 +25,14 @@ use std::{fmt::Display, sync::Arc};
use tokio::sync::Mutex;
use tracing_subscriber::FmtSubscriber;
use triton_distributed_runtime::{
use dynemo_runtime::{
self as rs,
pipeline::{EngineStream, ManyOut, SingleIn},
protocols::annotated::Annotated as RsAnnotated,
traits::DistributedRuntimeProvider,
};
use triton_distributed_llm::{self as llm_rs};
use dynemo_llm::{self as llm_rs};
mod engine;
mod llm;
......
......@@ -19,7 +19,7 @@ use crate::llm::model_card::ModelDeploymentCard;
use llm_rs::protocols::common::llm_backend::{BackendInput, BackendOutput};
use llm_rs::types::Annotated;
use triton_distributed_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
use dynemo_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
use crate::engine::PythonAsyncEngine;
......
......@@ -27,9 +27,9 @@ use llm_rs::{
},
};
use triton_distributed_runtime::pipeline::{Operator, ServiceFrontend, Source};
use dynemo_runtime::pipeline::{Operator, ServiceFrontend, Source};
use triton_distributed_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};
use dynemo_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};
#[pyclass]
pub(crate) struct OAIChatPreprocessor {
......
......@@ -26,7 +26,7 @@ RequestHandler = Callable[[JsonLike], AsyncGenerator[JsonLike, None]]
class DistributedRuntime:
"""
The runtime object for a distributed NOVA applications
The runtime object for dynemo applications
"""
...
......
......@@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from triton_distributed._core import KvMetricsPublisher as KvMetricsPublisher
from triton_distributed._core import KvRouter as KvRouter
from dynemo._core import KvMetricsPublisher as KvMetricsPublisher
from dynemo._core import KvRouter as KvRouter
......@@ -22,15 +22,15 @@ from pydantic import BaseModel, ValidationError
# List all the classes in the _core module for re-export
# import * causes "unable to detect undefined names"
from triton_distributed._core import Backend as Backend
from triton_distributed._core import Client as Client
from triton_distributed._core import DistributedRuntime as DistributedRuntime
from triton_distributed._core import KvRouter as KvRouter
from triton_distributed._core import ModelDeploymentCard as ModelDeploymentCard
from triton_distributed._core import OAIChatPreprocessor as OAIChatPreprocessor
from dynemo._core import Backend as Backend
from dynemo._core import Client as Client
from dynemo._core import DistributedRuntime as DistributedRuntime
from dynemo._core import KvRouter as KvRouter
from dynemo._core import ModelDeploymentCard as ModelDeploymentCard
from dynemo._core import OAIChatPreprocessor as OAIChatPreprocessor
def triton_worker():
def dynemo_worker():
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
......@@ -59,7 +59,7 @@ def triton_worker():
return decorator
def triton_endpoint(
def dynemo_endpoint(
request_model: Union[Type[BaseModel], Type[Any]], response_model: Type[BaseModel]
) -> Callable:
def decorator(
......
......@@ -19,7 +19,7 @@ import string
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
# Soak Test
#
......@@ -31,7 +31,7 @@ from triton_distributed.runtime import DistributedRuntime, triton_worker
# could still eventually be a problem.
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
ns = random_string()
task = asyncio.create_task(server_init(runtime, ns))
......
......@@ -20,7 +20,7 @@ pytestmark = pytest.mark.pre_merge
def test_bindings_install():
# Verify python bindings to rust can be imported
import triton_distributed.runtime as tdr
import dynemo.runtime as tdr
# Placeholder to avoid unused import errors or removal by linters
assert tdr
......@@ -15,7 +15,7 @@
import asyncio
from triton_distributed._core import DistributedRuntime
from dynemo._core import DistributedRuntime
async def test_simple_put_get():
......
......@@ -1387,6 +1387,109 @@ dependencies = [
"reborrow",
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"hf-hub 0.4.1",
"indexmap 2.7.1",
"insta",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"proptest",
"pyo3",
"regex",
"reqwest",
"rstest",
"semver",
"sentencepiece",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"tempfile",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.29",
"toktrie_hf_tokenizers 0.6.29",
"tracing",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "ed25519"
version = "2.2.3"
......@@ -5747,109 +5850,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"hf-hub 0.4.1",
"indexmap 2.7.1",
"insta",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"proptest",
"pyo3",
"regex",
"reqwest",
"rstest",
"semver",
"sentencepiece",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"tempfile",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.29",
"toktrie_hf_tokenizers 0.6.29",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment