Commit 602352ce authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: rename dynamo (#44)


Co-authored-by: default avatarBiswa Panda <biswa.panda@gmail.com>
parent ecf53ce2
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import random
import string
import uvloop
from dynemo.runtime import DistributedRuntime, dynemo_worker
# Soak Test
#
# This was a failure case for the distributed runtime. If the Rust Tokio
# runtime is started with a small number of threads, it will starve the
# the GIL + asyncio event loop can starve timeout the ingress handler.
#
# There may still be some blocking operations in the ingress handler that
# could still eventually be a problem.
@dynemo_worker()
async def worker(runtime: DistributedRuntime):
ns = random_string()
task = asyncio.create_task(server_init(runtime, ns))
await client_init(runtime, ns)
runtime.shutdown()
await task
async def client_init(runtime: DistributedRuntime, ns: str):
"""
Instantiate a `backend` client and call the `generate` endpoint
"""
# get endpoint
endpoint = runtime.namespace(ns).component("backend").endpoint("generate")
# create client
client = await endpoint.client()
# wait for an endpoint to be ready
await client.wait_for_endpoints()
# Issue many concurrent requests to put load on the server,
# the task should issue the request and process the response
tasks = []
for i in range(20000):
tasks.append(asyncio.create_task(do_one(client)))
await asyncio.gather(*tasks)
# ensure all tasks are done and without errors
error_count = 0
for task in tasks:
if task.exception():
error_count += 1
assert error_count == 0, f"expected 0 errors, got {error_count}"
async def do_one(client):
stream = await client.generate("hello world")
async for char in stream:
pass
async def server_init(runtime: DistributedRuntime, ns: str):
"""
Instantiate a `backend` component and serve the `generate` endpoint
A `Component` can serve multiple endpoints
"""
component = runtime.namespace(ns).component("backend")
await component.create_service()
endpoint = component.endpoint("generate")
print("Started server instance")
await endpoint.serve_endpoint(RequestHandler().generate)
class RequestHandler:
"""
Request handler for the generate endpoint
"""
async def generate(self, request):
for char in request:
await asyncio.sleep(0.1)
yield char
def random_string(length=10):
chars = string.ascii_letters + string.digits # a-z, A-Z, 0-9
return "".join(random.choices(chars, k=length))
if __name__ == "__main__":
uvloop.install()
asyncio.run(worker())
......@@ -20,7 +20,7 @@ pytestmark = pytest.mark.pre_merge
def test_bindings_install():
# Verify python bindings to rust can be imported
import dynemo.runtime as tdr
import dynamo.runtime as tdr
# Placeholder to avoid unused import errors or removal by linters
assert tdr
......@@ -15,7 +15,10 @@
import asyncio
from dynemo._core import DistributedRuntime
from dynamo._core import DistributedRuntime
# Todo add support for launching etcd
# pytestmark = pytest.mark.pre_merge
async def test_simple_put_get():
......
......@@ -24,8 +24,8 @@ from typing import List
import pytest
from dynemo.llm import KvIndexer, KvMetricsAggregator, KvMetricsPublisher
from dynemo.runtime import DistributedRuntime
from dynamo.llm import KvIndexer, KvMetricsAggregator, KvMetricsPublisher
from dynamo.runtime import DistributedRuntime
pytestmark = pytest.mark.pre_merge
......@@ -89,7 +89,7 @@ async def test_event_handler():
# KV events
class DynemoResult:
class DynamoResult:
OK = 0
ERR = 1
......@@ -101,13 +101,13 @@ class EventPublisher:
# load event publisher library
self.lib = ctypes.CDLL(os.environ["VLLM_KV_CAPI_PATH"])
self.lib.dynemo_llm_init.argtypes = [c_char_p, c_char_p, c_int64]
self.lib.dynemo_llm_init.restype = c_uint32
result = self.lib.dynemo_llm_init(
self.lib.dynamo_llm_init.argtypes = [c_char_p, c_char_p, c_int64]
self.lib.dynamo_llm_init.restype = c_uint32
result = self.lib.dynamo_llm_init(
namespace.encode(), component.encode(), worker_id
)
assert result == DynemoResult.OK
self.lib.dynemo_kv_event_publish_stored.argtypes = [
assert result == DynamoResult.OK
self.lib.dynamo_kv_event_publish_stored.argtypes = [
ctypes.c_uint64, # event_id
ctypes.POINTER(ctypes.c_uint32), # token_ids
ctypes.POINTER(ctypes.c_size_t), # num_block_tokens
......@@ -116,18 +116,18 @@ class EventPublisher:
ctypes.POINTER(ctypes.c_uint64), # parent_hash
ctypes.c_uint64, # lora_id
]
self.lib.dynemo_kv_event_publish_stored.restype = (
self.lib.dynamo_kv_event_publish_stored.restype = (
ctypes.c_uint32
) # dynemo_llm_result_t
) # dynamo_llm_result_t
self.lib.dynemo_kv_event_publish_removed.argtypes = [
self.lib.dynamo_kv_event_publish_removed.argtypes = [
ctypes.c_uint64, # event_id
ctypes.POINTER(ctypes.c_uint64), # block_ids
ctypes.c_size_t, # num_blocks
]
self.lib.dynemo_kv_event_publish_removed.restype = (
self.lib.dynamo_kv_event_publish_removed.restype = (
ctypes.c_uint32
) # dynemo_llm_result_t
) # dynamo_llm_result_t
def store_event(self, tokens, lora_id):
parent_hash = (
......@@ -135,7 +135,7 @@ class EventPublisher:
if self.event_id_counter > 0
else None
)
result = self.lib.dynemo_kv_event_publish_stored(
result = self.lib.dynamo_kv_event_publish_stored(
self.event_id_counter, # uint64_t event_id
(ctypes.c_uint32 * len(tokens))(*tokens), # const uint32_t *token_ids
(ctypes.c_size_t * 1)(len(tokens)), # const uintptr_t *num_block_tokens
......@@ -147,17 +147,17 @@ class EventPublisher:
self.block_ids.append(self.event_id_counter)
self.event_id_counter += 1
assert result == DynemoResult.OK
assert result == DynamoResult.OK
def remove_event(self):
result = self.lib.dynemo_kv_event_publish_removed(
result = self.lib.dynamo_kv_event_publish_removed(
self.event_id_counter, # uint64_t event_id
(ctypes.c_uint64 * 1)(self.block_ids[-1]), # const uint64_t *block_ids
1, # uintptr_t num_blocks
)
self.event_id_counter += 1
assert result == DynemoResult.OK
assert result == DynamoResult.OK
async def test_metrics_aggregator():
......
......@@ -1409,7 +1409,7 @@ dependencies = [
]
[[package]]
name = "dynemo-llm"
name = "dynamo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
......@@ -1425,7 +1425,7 @@ dependencies = [
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"dynamo-runtime",
"either",
"erased-serde",
"futures",
......@@ -1470,7 +1470,7 @@ dependencies = [
]
[[package]]
name = "dynemo-runtime"
name = "dynamo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
......
......@@ -22,7 +22,7 @@ homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
[package]
name = "dynemo-llm"
name = "dynamo-llm"
version.workspace = true
edition.workspace = true
authors.workspace = true
......@@ -44,7 +44,7 @@ vulkan = ["llama-cpp-2/vulkan"]
[workspace.dependencies]
# local or crates.io
dynemo-runtime = { version = "0.2.0", path = "../runtime" }
dynamo-runtime = { version = "0.2.0", path = "../runtime" }
# crates.io
anyhow = { version = "1" }
......@@ -67,7 +67,7 @@ strum = { version = "0.27", features = ["derive"] }
[dependencies]
# repo
dynemo-runtime = { workspace = true }
dynamo-runtime = { workspace = true }
# workspace
anyhow = { workspace = true }
......
......@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt};
use tracing as log;
use crate::model_card::model::{ModelDeploymentCard, TokenizerKind};
use dynemo_runtime::{
use dynamo_runtime::{
pipeline::{
async_trait, AsyncEngineContextProvider, ManyOut, Operator, ResponseStream,
ServerStreamingEngine, SingleIn,
......
......@@ -22,11 +22,11 @@ use std::{
use anyhow::Context;
use async_stream::stream;
use async_trait::async_trait;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::CancellationToken;
use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynamo_runtime::pipeline::error as pipeline_error;
use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::CancellationToken;
use llama_cpp_2::{
context::{params::LlamaContextParams, LlamaContext},
llama_backend::LlamaBackend,
......
......@@ -28,10 +28,10 @@ use mistralrs::{
};
use tokio::sync::mpsc::channel;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynamo_runtime::pipeline::error as pipeline_error;
use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynamo_runtime::protocols::annotated::Annotated;
use crate::protocols::openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
......
......@@ -16,8 +16,8 @@
use std::ffi::CStr;
use std::{path::Path, sync::Arc};
use dynemo_runtime::pipeline::error as pipeline_error;
pub use dynemo_runtime::{
use dynamo_runtime::pipeline::error as pipeline_error;
pub use dynamo_runtime::{
error,
pipeline::{
async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,
......
......@@ -17,8 +17,8 @@ use std::path::Path;
use std::sync::Arc;
use crate::backend::ExecutionContext;
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::CancellationToken;
use dynamo_runtime::pipeline::error as pipeline_error;
use dynamo_runtime::CancellationToken;
mod worker;
......
......@@ -19,10 +19,10 @@ use async_stream::stream;
use async_trait::async_trait;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::runtime::CancellationToken;
use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::runtime::CancellationToken;
use crate::engines::MultiNodeConfig;
......
......@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender;
use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError};
use tokio::{io::AsyncReadExt as _, task::JoinHandle};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::runtime::CancellationToken;
use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::runtime::CancellationToken;
use crate::engines::sglang::MultiGPUConfig;
use crate::engines::MultiNodeConfig;
......
......@@ -16,7 +16,7 @@
use std::sync::Arc;
use crate::backend::ExecutionContext;
use dynemo_runtime::pipeline::error as pipeline_error;
use dynamo_runtime::pipeline::error as pipeline_error;
pub mod executor;
......
......@@ -15,9 +15,9 @@
use anyhow::{Error, Result};
use async_trait::async_trait;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynamo_runtime::pipeline::{ManyOut, SingleIn};
use dynamo_runtime::protocols::annotated::Annotated;
use futures::stream;
use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken;
......
......@@ -19,8 +19,8 @@ use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::CancellationToken;
use dynamo_runtime::pipeline::error as pipeline_error;
use dynamo_runtime::CancellationToken;
use crate::backend::ExecutionContext;
use crate::engines::MultiNodeConfig;
......
......@@ -21,10 +21,10 @@ use async_trait::async_trait;
use crate::engines::vllm::worker;
use crate::engines::MultiNodeConfig;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::runtime::CancellationToken;
use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::runtime::CancellationToken;
pub struct VllmEngine {
cancel_token: CancellationToken,
......
......@@ -24,7 +24,7 @@ use tokio::select;
use tokio::time;
use tracing;
use dynemo_runtime::CancellationToken;
use dynamo_runtime::CancellationToken;
/// Default is 16 seconds, we make it a bit shorter
const RAY_STOP_TIMEOUT_SECS: u32 = 10;
......
......@@ -19,8 +19,8 @@ use std::{
};
use async_zmq::{SinkExt, StreamExt};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::CancellationToken;
use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::CancellationToken;
use pyo3::{
prelude::*,
types::{IntoPyDict, PyBytes, PyString},
......
......@@ -18,7 +18,7 @@ use std::sync::Arc;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::Receiver;
use dynemo_runtime::{
use dynamo_runtime::{
protocols::{self, annotated::Annotated},
raise,
transports::etcd::{KeyValue, WatchEvent},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment