Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
...@@ -17,12 +17,12 @@ import asyncio ...@@ -17,12 +17,12 @@ import asyncio
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
await init(runtime, "triton-init") await init(runtime, "dynemo")
async def init(runtime: DistributedRuntime, ns: str): async def init(runtime: DistributedRuntime, ns: str):
......
...@@ -21,7 +21,7 @@ import uvloop ...@@ -21,7 +21,7 @@ import uvloop
from client import init as client_init from client import init as client_init
from server import init as server_init from server import init as server_init
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
def random_string(length=10): def random_string(length=10):
...@@ -29,7 +29,7 @@ def random_string(length=10): ...@@ -29,7 +29,7 @@ def random_string(length=10):
return "".join(random.choices(chars, k=length)) return "".join(random.choices(chars, k=length))
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
ns = random_string() ns = random_string()
task = asyncio.create_task(server_init(runtime, ns)) task = asyncio.create_task(server_init(runtime, ns))
......
...@@ -17,7 +17,7 @@ import asyncio ...@@ -17,7 +17,7 @@ import asyncio
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
class RequestHandler: class RequestHandler:
...@@ -31,9 +31,9 @@ class RequestHandler: ...@@ -31,9 +31,9 @@ class RequestHandler:
yield char yield char
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
await init(runtime, "triton-init") await init(runtime, "dynemo")
async def init(runtime: DistributedRuntime, ns: str): async def init(runtime: DistributedRuntime, ns: str):
......
...@@ -17,7 +17,7 @@ import asyncio ...@@ -17,7 +17,7 @@ import asyncio
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install() uvloop.install()
...@@ -29,7 +29,7 @@ class RequestHandler: ...@@ -29,7 +29,7 @@ class RequestHandler:
yield char yield char
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
component = runtime.namespace("examples/pipeline").component("backend") component = runtime.namespace("examples/pipeline").component("backend")
await component.create_service() await component.create_service()
......
...@@ -17,7 +17,7 @@ import asyncio ...@@ -17,7 +17,7 @@ import asyncio
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install() uvloop.install()
...@@ -32,7 +32,7 @@ class RequestHandler: ...@@ -32,7 +32,7 @@ class RequestHandler:
yield output.get("data") yield output.get("data")
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
# client to the next component - in this case the middle component # client to the next component - in this case the middle component
next = ( next = (
......
...@@ -17,7 +17,7 @@ import asyncio ...@@ -17,7 +17,7 @@ import asyncio
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install() uvloop.install()
...@@ -32,7 +32,7 @@ class RequestHandler: ...@@ -32,7 +32,7 @@ class RequestHandler:
yield output.get("data") yield output.get("data")
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
# client to backend # client to backend
backend = ( backend = (
......
...@@ -17,12 +17,12 @@ import asyncio ...@@ -17,12 +17,12 @@ import asyncio
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
uvloop.install() uvloop.install()
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
""" """
# Pipeline Example # Pipeline Example
......
...@@ -17,18 +17,16 @@ import asyncio ...@@ -17,18 +17,16 @@ import asyncio
from protocol import Request from protocol import Request
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
""" """
Instantiate a `backend` client and call the `generate` endpoint Instantiate a `backend` client and call the `generate` endpoint
""" """
# get endpoint # get endpoint
endpoint = ( endpoint = runtime.namespace("dynemo").component("backend").endpoint("generate")
runtime.namespace("triton-init").component("backend").endpoint("generate")
)
# create client # create client
client = await endpoint.client() client = await endpoint.client()
......
...@@ -19,11 +19,7 @@ import asyncio ...@@ -19,11 +19,7 @@ import asyncio
import uvloop import uvloop
from protocol import Request, Response from protocol import Request, Response
from triton_distributed.runtime import ( from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
DistributedRuntime,
triton_endpoint,
triton_worker,
)
uvloop.install() uvloop.install()
...@@ -33,19 +29,19 @@ class RequestHandler: ...@@ -33,19 +29,19 @@ class RequestHandler:
Request handler for the generate endpoint Request handler for the generate endpoint
""" """
@triton_endpoint(Request, Response) @dynemo_endpoint(Request, Response)
async def generate(self, request): async def generate(self, request):
for char in request.data: for char in request.data:
yield char yield char
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
""" """
Instantiate a `backend` component and serve the `generate` endpoint Instantiate a `backend` component and serve the `generate` endpoint
A `Component` can serve multiple endpoints A `Component` can serve multiple endpoints
""" """
component = runtime.namespace("triton-init").component("backend") component = runtime.namespace("dynemo").component("backend")
await component.create_service() await component.create_service()
endpoint = component.endpoint("generate") endpoint = component.endpoint("generate")
......
...@@ -15,8 +15,7 @@ ...@@ -15,8 +15,7 @@
use std::sync::Arc; use std::sync::Arc;
pub use serde::{Deserialize, Serialize}; pub use dynemo_runtime::{
pub use triton_distributed_runtime::{
error, error,
pipeline::{ pipeline::{
async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream, async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,
...@@ -25,6 +24,7 @@ pub use triton_distributed_runtime::{ ...@@ -25,6 +24,7 @@ pub use triton_distributed_runtime::{
protocols::annotated::Annotated, protocols::annotated::Annotated,
Error, Result, Error, Result,
}; };
pub use serde::{Deserialize, Serialize};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3_async_runtimes::TaskLocals; use pyo3_async_runtimes::TaskLocals;
...@@ -192,7 +192,6 @@ where ...@@ -192,7 +192,6 @@ where
// tell the python async generator to stop generating // tell the python async generator to stop generating
// right now, this is impossible as we are not passing the context to the python async generator // right now, this is impossible as we are not passing the context to the python async generator
// todo: add task-local context to the python async generator // todo: add task-local context to the python async generator
// see: https://github.com/triton-inference-server/triton_distributed/issues/130
ctx.stop_generating(); ctx.stop_generating();
let msg = format!("critical error: invalid response object from python async generator; application-logic-mismatch: {}", e); let msg = format!("critical error: invalid response object from python async generator; application-logic-mismatch: {}", e);
tracing::error!(request_id, "{}", msg); tracing::error!(request_id, "{}", msg);
......
...@@ -25,14 +25,14 @@ use std::{fmt::Display, sync::Arc}; ...@@ -25,14 +25,14 @@ use std::{fmt::Display, sync::Arc};
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tracing_subscriber::FmtSubscriber; use tracing_subscriber::FmtSubscriber;
use triton_distributed_runtime::{ use dynemo_runtime::{
self as rs, self as rs,
pipeline::{EngineStream, ManyOut, SingleIn}, pipeline::{EngineStream, ManyOut, SingleIn},
protocols::annotated::Annotated as RsAnnotated, protocols::annotated::Annotated as RsAnnotated,
traits::DistributedRuntimeProvider, traits::DistributedRuntimeProvider,
}; };
use triton_distributed_llm::{self as llm_rs}; use dynemo_llm::{self as llm_rs};
mod engine; mod engine;
mod llm; mod llm;
......
...@@ -19,7 +19,7 @@ use crate::llm::model_card::ModelDeploymentCard; ...@@ -19,7 +19,7 @@ use crate::llm::model_card::ModelDeploymentCard;
use llm_rs::protocols::common::llm_backend::{BackendInput, BackendOutput}; use llm_rs::protocols::common::llm_backend::{BackendInput, BackendOutput};
use llm_rs::types::Annotated; use llm_rs::types::Annotated;
use triton_distributed_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source}; use dynemo_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
use crate::engine::PythonAsyncEngine; use crate::engine::PythonAsyncEngine;
......
...@@ -27,9 +27,9 @@ use llm_rs::{ ...@@ -27,9 +27,9 @@ use llm_rs::{
}, },
}; };
use triton_distributed_runtime::pipeline::{Operator, ServiceFrontend, Source}; use dynemo_runtime::pipeline::{Operator, ServiceFrontend, Source};
use triton_distributed_runtime::pipeline::{ManyOut, SegmentSink, SingleIn}; use dynemo_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};
#[pyclass] #[pyclass]
pub(crate) struct OAIChatPreprocessor { pub(crate) struct OAIChatPreprocessor {
......
...@@ -26,7 +26,7 @@ RequestHandler = Callable[[JsonLike], AsyncGenerator[JsonLike, None]] ...@@ -26,7 +26,7 @@ RequestHandler = Callable[[JsonLike], AsyncGenerator[JsonLike, None]]
class DistributedRuntime: class DistributedRuntime:
""" """
The runtime object for a distributed NOVA applications The runtime object for dynemo applications
""" """
... ...
......
...@@ -13,5 +13,5 @@ ...@@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from triton_distributed._core import KvMetricsPublisher as KvMetricsPublisher from dynemo._core import KvMetricsPublisher as KvMetricsPublisher
from triton_distributed._core import KvRouter as KvRouter from dynemo._core import KvRouter as KvRouter
...@@ -19,7 +19,7 @@ import string ...@@ -19,7 +19,7 @@ import string
import uvloop import uvloop
from triton_distributed.runtime import DistributedRuntime, triton_worker from dynemo.runtime import DistributedRuntime, dynemo_worker
# Soak Test # Soak Test
# #
...@@ -31,7 +31,7 @@ from triton_distributed.runtime import DistributedRuntime, triton_worker ...@@ -31,7 +31,7 @@ from triton_distributed.runtime import DistributedRuntime, triton_worker
# could still eventually be a problem. # could still eventually be a problem.
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
ns = random_string() ns = random_string()
task = asyncio.create_task(server_init(runtime, ns)) task = asyncio.create_task(server_init(runtime, ns))
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment