Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
......@@ -17,12 +17,12 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
await init(runtime, "triton-init")
await init(runtime, "dynemo")
async def init(runtime: DistributedRuntime, ns: str):
......
......@@ -21,7 +21,7 @@ import uvloop
from client import init as client_init
from server import init as server_init
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
def random_string(length=10):
......@@ -29,7 +29,7 @@ def random_string(length=10):
return "".join(random.choices(chars, k=length))
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
ns = random_string()
task = asyncio.create_task(server_init(runtime, ns))
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
class RequestHandler:
......@@ -31,9 +31,9 @@ class RequestHandler:
yield char
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
await init(runtime, "triton-init")
await init(runtime, "dynemo")
async def init(runtime: DistributedRuntime, ns: str):
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
......@@ -29,7 +29,7 @@ class RequestHandler:
yield char
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
component = runtime.namespace("examples/pipeline").component("backend")
await component.create_service()
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
......@@ -32,7 +32,7 @@ class RequestHandler:
yield output.get("data")
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
# client to the next component - in this case the middle component
next = (
......
......@@ -17,7 +17,7 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
......@@ -32,7 +32,7 @@ class RequestHandler:
yield output.get("data")
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
# client to backend
backend = (
......
......@@ -17,12 +17,12 @@ import asyncio
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install()
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
"""
# Pipeline Example
......
......@@ -17,18 +17,16 @@ import asyncio
from protocol import Request
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
"""
Instantiate a `backend` client and call the `generate` endpoint
"""
# get endpoint
endpoint = (
runtime.namespace("triton-init").component("backend").endpoint("generate")
)
endpoint = runtime.namespace("dynemo").component("backend").endpoint("generate")
# create client
client = await endpoint.client()
......
......@@ -19,11 +19,7 @@ import asyncio
import uvloop
from protocol import Request, Response
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
uvloop.install()
......@@ -33,19 +29,19 @@ class RequestHandler:
Request handler for the generate endpoint
"""
@triton_endpoint(Request, Response)
@dynemo_endpoint(Request, Response)
async def generate(self, request):
for char in request.data:
yield char
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
"""
Instantiate a `backend` component and serve the `generate` endpoint
A `Component` can serve multiple endpoints
"""
component = runtime.namespace("triton-init").component("backend")
component = runtime.namespace("dynemo").component("backend")
await component.create_service()
endpoint = component.endpoint("generate")
......
......@@ -15,8 +15,7 @@
use std::sync::Arc;
pub use serde::{Deserialize, Serialize};
pub use triton_distributed_runtime::{
pub use dynemo_runtime::{
error,
pipeline::{
async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,
......@@ -25,6 +24,7 @@ pub use triton_distributed_runtime::{
protocols::annotated::Annotated,
Error, Result,
};
pub use serde::{Deserialize, Serialize};
use pyo3::prelude::*;
use pyo3_async_runtimes::TaskLocals;
......@@ -192,7 +192,6 @@ where
// tell the python async generator to stop generating
// right now, this is impossible as we are not passing the context to the python async generator
// todo: add task-local context to the python async generator
// see: https://github.com/triton-inference-server/triton_distributed/issues/130
ctx.stop_generating();
let msg = format!("critical error: invalid response object from python async generator; application-logic-mismatch: {}", e);
tracing::error!(request_id, "{}", msg);
......
......@@ -25,14 +25,14 @@ use std::{fmt::Display, sync::Arc};
use tokio::sync::Mutex;
use tracing_subscriber::FmtSubscriber;
use triton_distributed_runtime::{
use dynemo_runtime::{
self as rs,
pipeline::{EngineStream, ManyOut, SingleIn},
protocols::annotated::Annotated as RsAnnotated,
traits::DistributedRuntimeProvider,
};
use triton_distributed_llm::{self as llm_rs};
use dynemo_llm::{self as llm_rs};
mod engine;
mod llm;
......
......@@ -19,7 +19,7 @@ use crate::llm::model_card::ModelDeploymentCard;
use llm_rs::protocols::common::llm_backend::{BackendInput, BackendOutput};
use llm_rs::types::Annotated;
use triton_distributed_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
use dynemo_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
use crate::engine::PythonAsyncEngine;
......
......@@ -27,9 +27,9 @@ use llm_rs::{
},
};
use triton_distributed_runtime::pipeline::{Operator, ServiceFrontend, Source};
use dynemo_runtime::pipeline::{Operator, ServiceFrontend, Source};
use triton_distributed_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};
use dynemo_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};
#[pyclass]
pub(crate) struct OAIChatPreprocessor {
......
......@@ -26,7 +26,7 @@ RequestHandler = Callable[[JsonLike], AsyncGenerator[JsonLike, None]]
class DistributedRuntime:
"""
The runtime object for a distributed NOVA applications
The runtime object for dynemo applications
"""
...
......
......@@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from triton_distributed._core import KvMetricsPublisher as KvMetricsPublisher
from triton_distributed._core import KvRouter as KvRouter
from dynemo._core import KvMetricsPublisher as KvMetricsPublisher
from dynemo._core import KvRouter as KvRouter
......@@ -19,7 +19,7 @@ import string
import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker
from dynemo.runtime import DistributedRuntime, dynemo_worker
# Soak Test
#
......@@ -31,7 +31,7 @@ from triton_distributed.runtime import DistributedRuntime, triton_worker
# could still eventually be a problem.
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
ns = random_string()
task = asyncio.create_task(server_init(runtime, ns))
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment