Commit 602352ce authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: rename dynamo (#44)


Co-authored-by: default avatarBiswa Panda <biswa.panda@gmail.com>
parent ecf53ce2
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
use std::sync::Arc; use std::sync::Arc;
use dynemo_llm::{ use dynamo_llm::{
backend::Backend, backend::Backend,
http::service::{discovery, service_v2}, http::service::{discovery, service_v2},
model_type::ModelType, model_type::ModelType,
...@@ -27,7 +27,7 @@ use dynemo_llm::{ ...@@ -27,7 +27,7 @@ use dynemo_llm::{
Annotated, Annotated,
}, },
}; };
use dynemo_runtime::{ use dynamo_runtime::{
pipeline::{ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source}, pipeline::{ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source},
DistributedRuntime, Runtime, DistributedRuntime, Runtime,
}; };
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use dynemo_llm::{ use dynamo_llm::{
backend::Backend, backend::Backend,
preprocessor::OpenAIPreprocessor, preprocessor::OpenAIPreprocessor,
types::{ types::{
...@@ -24,7 +24,7 @@ use dynemo_llm::{ ...@@ -24,7 +24,7 @@ use dynemo_llm::{
Annotated, Annotated,
}, },
}; };
use dynemo_runtime::{ use dynamo_runtime::{
pipeline::{Context, ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source}, pipeline::{Context, ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source},
runtime::CancellationToken, runtime::CancellationToken,
DistributedRuntime, Runtime, DistributedRuntime, Runtime,
......
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
#[cfg(any(feature = "vllm", feature = "sglang"))] #[cfg(any(feature = "vllm", feature = "sglang"))]
use std::{future::Future, pin::Pin}; use std::{future::Future, pin::Pin};
use dynemo_llm::{ use dynamo_llm::{
backend::ExecutionContext, model_card::model::ModelDeploymentCard, backend::ExecutionContext, model_card::model::ModelDeploymentCard,
types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine, types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine,
}; };
use dynemo_runtime::protocols::Endpoint; use dynamo_runtime::protocols::Endpoint;
mod flags; mod flags;
pub use flags::Flags; pub use flags::Flags;
...@@ -67,7 +67,7 @@ pub enum EngineConfig { ...@@ -67,7 +67,7 @@ pub enum EngineConfig {
#[allow(unused_mut)] #[allow(unused_mut)]
pub async fn run( pub async fn run(
runtime: dynemo_runtime::Runtime, runtime: dynamo_runtime::Runtime,
mut in_opt: Input, // mut because vllm and sglang multi-node can change it mut in_opt: Input, // mut because vllm and sglang multi-node can change it
out_opt: Output, out_opt: Output,
flags: Flags, flags: Flags,
...@@ -152,12 +152,12 @@ pub async fn run( ...@@ -152,12 +152,12 @@ pub async fn run(
}; };
EngineConfig::StaticFull { EngineConfig::StaticFull {
service_name: model_name, service_name: model_name,
engine: dynemo_llm::engines::mistralrs::make_engine(&model_path).await?, engine: dynamo_llm::engines::mistralrs::make_engine(&model_path).await?,
} }
} }
#[cfg(feature = "sglang")] #[cfg(feature = "sglang")]
Output::SgLang => { Output::SgLang => {
use dynemo_llm::engines::sglang; use dynamo_llm::engines::sglang;
let Some(model_path) = model_path else { let Some(model_path) = model_path else {
anyhow::bail!("out=sglang requires flag --model-path=<full-path-to-model-dir>"); anyhow::bail!("out=sglang requires flag --model-path=<full-path-to-model-dir>");
}; };
...@@ -169,7 +169,7 @@ pub async fn run( ...@@ -169,7 +169,7 @@ pub async fn run(
let Some(sock_prefix) = zmq_socket_prefix else { let Some(sock_prefix) = zmq_socket_prefix else {
anyhow::bail!("sglang requires zmq_socket_prefix"); anyhow::bail!("sglang requires zmq_socket_prefix");
}; };
let node_conf = dynemo_llm::engines::MultiNodeConfig { let node_conf = dynamo_llm::engines::MultiNodeConfig {
num_nodes: flags.num_nodes, num_nodes: flags.num_nodes,
node_rank: flags.node_rank, node_rank: flags.node_rank,
leader_addr: flags.leader_addr.unwrap_or_default(), leader_addr: flags.leader_addr.unwrap_or_default(),
...@@ -207,7 +207,7 @@ pub async fn run( ...@@ -207,7 +207,7 @@ pub async fn run(
} }
#[cfg(feature = "vllm")] #[cfg(feature = "vllm")]
Output::Vllm => { Output::Vllm => {
use dynemo_llm::engines::vllm; use dynamo_llm::engines::vllm;
if flags.base_gpu_id != 0 { if flags.base_gpu_id != 0 {
anyhow::bail!("vllm does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead."); anyhow::bail!("vllm does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead.");
} }
...@@ -231,7 +231,7 @@ pub async fn run( ...@@ -231,7 +231,7 @@ pub async fn run(
let Some(sock_prefix) = zmq_socket_prefix else { let Some(sock_prefix) = zmq_socket_prefix else {
anyhow::bail!("vllm requires zmq_socket_prefix"); anyhow::bail!("vllm requires zmq_socket_prefix");
}; };
let node_conf = dynemo_llm::engines::MultiNodeConfig { let node_conf = dynamo_llm::engines::MultiNodeConfig {
num_nodes: flags.num_nodes, num_nodes: flags.num_nodes,
node_rank: flags.node_rank, node_rank: flags.node_rank,
leader_addr: flags.leader_addr.unwrap_or_default(), leader_addr: flags.leader_addr.unwrap_or_default(),
...@@ -274,7 +274,7 @@ pub async fn run( ...@@ -274,7 +274,7 @@ pub async fn run(
} }
#[cfg(feature = "llamacpp")] #[cfg(feature = "llamacpp")]
Output::LlamaCpp => { Output::LlamaCpp => {
use dynemo_llm::engines::llamacpp; use dynamo_llm::engines::llamacpp;
let Some(model_path) = model_path else { let Some(model_path) = model_path else {
anyhow::bail!("out=llamacpp requires flag --model-path=<full-path-to-model-gguf>"); anyhow::bail!("out=llamacpp requires flag --model-path=<full-path-to-model-gguf>");
}; };
...@@ -295,7 +295,7 @@ pub async fn run( ...@@ -295,7 +295,7 @@ pub async fn run(
} }
#[cfg(feature = "trtllm")] #[cfg(feature = "trtllm")]
Output::TrtLLM => { Output::TrtLLM => {
use dynemo_llm::engines::trtllm; use dynamo_llm::engines::trtllm;
let Some(model_path) = model_path else { let Some(model_path) = model_path else {
anyhow::bail!("out=trtllm requires flag --model-path=<full-path-to-model-dir>"); anyhow::bail!("out=trtllm requires flag --model-path=<full-path-to-model-dir>");
}; };
...@@ -315,7 +315,7 @@ pub async fn run( ...@@ -315,7 +315,7 @@ pub async fn run(
} }
#[cfg(feature = "python")] #[cfg(feature = "python")]
Output::PythonStr(path_str) => { Output::PythonStr(path_str) => {
use dynemo_llm::engines::python; use dynamo_llm::engines::python;
let Some(model_name) = model_name else { let Some(model_name) = model_name else {
anyhow::bail!("Provide model service name as `--model-name <this>`"); anyhow::bail!("Provide model service name as `--model-name <this>`");
}; };
...@@ -328,7 +328,7 @@ pub async fn run( ...@@ -328,7 +328,7 @@ pub async fn run(
} }
#[cfg(feature = "python")] #[cfg(feature = "python")]
Output::PythonTok(path_str) => { Output::PythonTok(path_str) => {
use dynemo_llm::engines::python; use dynamo_llm::engines::python;
let Some(card) = maybe_card.clone() else { let Some(card) = maybe_card.clone() else {
anyhow::bail!("Could not find tokenizer. Pass flag --model-path <path>"); anyhow::bail!("Could not find tokenizer. Pass flag --model-path <path>");
}; };
......
...@@ -17,17 +17,17 @@ use std::env; ...@@ -17,17 +17,17 @@ use std::env;
use clap::Parser; use clap::Parser;
use dynemo_run::{Input, Output}; use dynamo_run::{Input, Output};
use dynemo_runtime::logging; use dynamo_runtime::logging;
const HELP: &str = r#" const HELP: &str = r#"
dynemo-run is a single binary that wires together the various inputs (http, text, network) and workers (network, engine), that runs the services. It is the simplest way to use dynemo locally. dynamo-run is a single binary that wires together the various inputs (http, text, network) and workers (network, engine), that runs the services. It is the simplest way to use dynamo locally.
Example: Example:
- cargo build --release --features mistralrs,cuda - cargo build --release --features mistralrs,cuda
- cd target/release - cd target/release
- ./dynemo-run hf_checkouts/Llama-3.2-3B-Instruct/ - ./dynamo-run hf_checkouts/Llama-3.2-3B-Instruct/
- OR: ./dynemo-run Llama-3.2-1B-Instruct-Q4_K_M.gguf - OR: ./dynamo-run Llama-3.2-1B-Instruct-Q4_K_M.gguf
"#; "#;
...@@ -41,14 +41,14 @@ const DEFAULT_OUT: Output = Output::EchoFull; ...@@ -41,14 +41,14 @@ const DEFAULT_OUT: Output = Output::EchoFull;
const ZMQ_SOCKET_PREFIX: &str = "dyn"; const ZMQ_SOCKET_PREFIX: &str = "dyn";
const USAGE: &str = "USAGE: dynemo-run in=[http|text|dyn://<path>|none] out=[mistralrs|sglang|llamacpp|vllm|trtllm|echo_full|echo_core|pystr:<engine.py>|pytok:<engine.py>] [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0]"; const USAGE: &str = "USAGE: dynamo-run in=[http|text|dyn://<path>|none] out=[mistralrs|sglang|llamacpp|vllm|trtllm|echo_full|echo_core|pystr:<engine.py>|pytok:<engine.py>] [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0]";
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
logging::init(); logging::init();
// Call sub-processes before starting the Runtime machinery // Call sub-processes before starting the Runtime machinery
// For anything except sub-process starting try_parse_from will error. // For anything except sub-process starting try_parse_from will error.
if let Ok(flags) = dynemo_run::Flags::try_parse_from(env::args()) { if let Ok(flags) = dynamo_run::Flags::try_parse_from(env::args()) {
#[allow(unused_variables)] #[allow(unused_variables)]
if let Some(sglang_flags) = flags.internal_sglang_process { if let Some(sglang_flags) = flags.internal_sglang_process {
let Some(model_path) = flags.model_path_flag.as_ref() else { let Some(model_path) = flags.model_path_flag.as_ref() else {
...@@ -60,13 +60,13 @@ fn main() -> anyhow::Result<()> { ...@@ -60,13 +60,13 @@ fn main() -> anyhow::Result<()> {
if cfg!(feature = "sglang") { if cfg!(feature = "sglang") {
#[cfg(feature = "sglang")] #[cfg(feature = "sglang")]
{ {
use dynemo_llm::engines::sglang; use dynamo_llm::engines::sglang;
let gpu_config = sglang::MultiGPUConfig { let gpu_config = sglang::MultiGPUConfig {
tp_size: flags.tensor_parallel_size, tp_size: flags.tensor_parallel_size,
tp_rank: sglang_flags.tp_rank, tp_rank: sglang_flags.tp_rank,
gpu_id: sglang_flags.gpu_id, gpu_id: sglang_flags.gpu_id,
}; };
let node_config = dynemo_llm::engines::MultiNodeConfig { let node_config = dynamo_llm::engines::MultiNodeConfig {
num_nodes: flags.num_nodes, num_nodes: flags.num_nodes,
node_rank: flags.node_rank, node_rank: flags.node_rank,
leader_addr: flags.leader_addr.unwrap_or_default(), leader_addr: flags.leader_addr.unwrap_or_default(),
...@@ -98,8 +98,8 @@ fn main() -> anyhow::Result<()> { ...@@ -98,8 +98,8 @@ fn main() -> anyhow::Result<()> {
if cfg!(feature = "vllm") { if cfg!(feature = "vllm") {
#[cfg(feature = "vllm")] #[cfg(feature = "vllm")]
{ {
use dynemo_llm::engines::vllm; use dynamo_llm::engines::vllm;
let node_config = dynemo_llm::engines::MultiNodeConfig { let node_config = dynamo_llm::engines::MultiNodeConfig {
num_nodes: flags.num_nodes, num_nodes: flags.num_nodes,
node_rank: flags.node_rank, node_rank: flags.node_rank,
leader_addr: flags.leader_addr.unwrap_or_default(), leader_addr: flags.leader_addr.unwrap_or_default(),
...@@ -119,15 +119,15 @@ fn main() -> anyhow::Result<()> { ...@@ -119,15 +119,15 @@ fn main() -> anyhow::Result<()> {
} }
// max_worker_threads and max_blocking_threads from env vars or config file. // max_worker_threads and max_blocking_threads from env vars or config file.
let rt_config = dynemo_runtime::RuntimeConfig::from_settings()?; let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?;
// One per process. Wraps a Runtime with holds two tokio runtimes. // One per process. Wraps a Runtime with holds two tokio runtimes.
let worker = dynemo_runtime::Worker::from_config(rt_config)?; let worker = dynamo_runtime::Worker::from_config(rt_config)?;
worker.execute(wrapper) worker.execute(wrapper)
} }
async fn wrapper(runtime: dynemo_runtime::Runtime) -> anyhow::Result<()> { async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
let mut in_opt = None; let mut in_opt = None;
let mut out_opt = None; let mut out_opt = None;
let args: Vec<String> = env::args().skip(1).collect(); let args: Vec<String> = env::args().skip(1).collect();
...@@ -171,13 +171,13 @@ async fn wrapper(runtime: dynemo_runtime::Runtime) -> anyhow::Result<()> { ...@@ -171,13 +171,13 @@ async fn wrapper(runtime: dynemo_runtime::Runtime) -> anyhow::Result<()> {
// Clap skips the first argument expecting it to be the binary name, so add it back // Clap skips the first argument expecting it to be the binary name, so add it back
// Note `--model-path` has index=1 (in lib.rs) so that doesn't need a flag. // Note `--model-path` has index=1 (in lib.rs) so that doesn't need a flag.
let flags = dynemo_run::Flags::try_parse_from( let flags = dynamo_run::Flags::try_parse_from(
["dynemo-run".to_string()] ["dynamo-run".to_string()]
.into_iter() .into_iter()
.chain(env::args().skip(non_flag_params)), .chain(env::args().skip(non_flag_params)),
)?; )?;
dynemo_run::run( dynamo_run::run(
runtime, runtime,
in_opt, in_opt,
out_opt, out_opt,
......
...@@ -18,12 +18,12 @@ use std::{sync::Arc, time::Duration}; ...@@ -18,12 +18,12 @@ use std::{sync::Arc, time::Duration};
use async_stream::stream; use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
use dynemo_llm::backend::ExecutionContext; use dynamo_llm::backend::ExecutionContext;
use dynemo_llm::preprocessor::BackendInput; use dynamo_llm::preprocessor::BackendInput;
use dynemo_llm::protocols::common::llm_backend::LLMEngineOutput; use dynamo_llm::protocols::common::llm_backend::LLMEngineOutput;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn}; use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated; use dynamo_runtime::protocols::annotated::Annotated;
/// How long to sleep between echoed tokens. /// How long to sleep between echoed tokens.
/// 50ms gives us 20 tok/s. /// 50ms gives us 20 tok/s.
......
...@@ -18,13 +18,13 @@ use std::{sync::Arc, time::Duration}; ...@@ -18,13 +18,13 @@ use std::{sync::Arc, time::Duration};
use async_stream::stream; use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
use dynemo_llm::protocols::openai::chat_completions::{ use dynamo_llm::protocols::openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse, NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
}; };
use dynemo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine; use dynamo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn}; use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated; use dynamo_runtime::protocols::annotated::Annotated;
/// How long to sleep between echoed tokens. /// How long to sleep between echoed tokens.
/// 50ms gives us 20 tok/s. /// 50ms gives us 20 tok/s.
......
...@@ -955,7 +955,7 @@ dependencies = [ ...@@ -955,7 +955,7 @@ dependencies = [
] ]
[[package]] [[package]]
name = "dynemo-llm" name = "dynamo-llm"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
...@@ -970,7 +970,7 @@ dependencies = [ ...@@ -970,7 +970,7 @@ dependencies = [
"chrono", "chrono",
"cmake", "cmake",
"derive_builder", "derive_builder",
"dynemo-runtime", "dynamo-runtime",
"either", "either",
"erased-serde", "erased-serde",
"futures", "futures",
...@@ -1004,7 +1004,7 @@ dependencies = [ ...@@ -1004,7 +1004,7 @@ dependencies = [
] ]
[[package]] [[package]]
name = "dynemo-runtime" name = "dynamo-runtime"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
...@@ -1947,14 +1947,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -1947,14 +1947,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]] [[package]]
name = "libdynemo-llm" name = "libdynamo_llm"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-once-cell", "async-once-cell",
"cbindgen", "cbindgen",
"dynemo-llm", "dynamo-llm",
"dynemo-runtime", "dynamo-runtime",
"futures", "futures",
"libc", "libc",
"once_cell", "once_cell",
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
[package] [package]
name = "libdynemo-llm" name = "libdynamo_llm"
version = "0.2.1" version = "0.2.1"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
...@@ -23,15 +23,15 @@ homepage = "https://github.com/dynemo-ai/dynemo" ...@@ -23,15 +23,15 @@ homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git" repository = "https://github.com/dynemo-ai/dynemo.git"
[lib] [lib]
name = "dynemo_llm_capi" name = "dynamo_llm_capi"
crate-type = ["cdylib"] crate-type = ["cdylib"]
[build-dependencies] [build-dependencies]
cbindgen = "0.27" cbindgen = "0.27"
[dependencies] [dependencies]
dynemo-llm = { path = "../../llm" } dynamo-llm = { path = "../../llm" }
dynemo-runtime = { path = "../../runtime" } dynamo-runtime = { path = "../../runtime" }
anyhow = { version = "1" } anyhow = { version = "1" }
futures = "0.3" futures = "0.3"
......
...@@ -22,7 +22,7 @@ fn main() { ...@@ -22,7 +22,7 @@ fn main() {
let header_path = Path::new(&crate_dir) let header_path = Path::new(&crate_dir)
.join("include") .join("include")
.join("nvidia") .join("nvidia")
.join("dynemo_llm") .join("dynamo_llm")
.join("llm_engine.h"); .join("llm_engine.h");
cbindgen::generate(crate_dir) cbindgen::generate(crate_dir)
......
...@@ -25,7 +25,7 @@ enum_class = false ...@@ -25,7 +25,7 @@ enum_class = false
[export] [export]
include = ["DynemoLlmResult", "dynemo_llm_init", "dynemo_llm_shutdown"] include = ["DynamoLlmResult", "dynamo_llm_init", "dynamo_llm_shutdown"]
[export.rename] [export.rename]
"DynemoLlmResult" = "dynemo_llm_result_t" "DynamoLlmResult" = "dynamo_llm_result_t"
...@@ -19,10 +19,10 @@ use once_cell::sync::OnceCell; ...@@ -19,10 +19,10 @@ use once_cell::sync::OnceCell;
use std::ffi::CStr; use std::ffi::CStr;
use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::atomic::{AtomicU32, Ordering};
use dynemo_llm::kv_router::{ use dynamo_llm::kv_router::{
indexer::compute_block_hash_for_seq, protocols::*, publisher::KvEventPublisher, indexer::compute_block_hash_for_seq, protocols::*, publisher::KvEventPublisher,
}; };
use dynemo_runtime::{DistributedRuntime, Worker}; use dynamo_runtime::{DistributedRuntime, Worker};
static WK: OnceCell<Worker> = OnceCell::new(); static WK: OnceCell<Worker> = OnceCell::new();
static DRT: AsyncOnceCell<DistributedRuntime> = AsyncOnceCell::new(); static DRT: AsyncOnceCell<DistributedRuntime> = AsyncOnceCell::new();
// [FIXME] shouldn't the publisher be instance passing between API calls? // [FIXME] shouldn't the publisher be instance passing between API calls?
...@@ -41,7 +41,7 @@ fn initialize_tracing() { ...@@ -41,7 +41,7 @@ fn initialize_tracing() {
} }
#[repr(u32)] #[repr(u32)]
pub enum DynemoLlmResult { pub enum DynamoLlmResult {
OK = 0, OK = 0,
ERR = 1, ERR = 1,
} }
...@@ -49,17 +49,17 @@ pub enum DynemoLlmResult { ...@@ -49,17 +49,17 @@ pub enum DynemoLlmResult {
/// # Safety /// # Safety
/// the namespace_c_str and component_c_str are passed as pointers to C strings /// the namespace_c_str and component_c_str are passed as pointers to C strings
#[no_mangle] #[no_mangle]
pub unsafe extern "C" fn dynemo_llm_init( pub unsafe extern "C" fn dynamo_llm_init(
namespace_c_str: *const c_char, namespace_c_str: *const c_char,
component_c_str: *const c_char, component_c_str: *const c_char,
worker_id: i64, worker_id: i64,
) -> DynemoLlmResult { ) -> DynamoLlmResult {
initialize_tracing(); initialize_tracing();
let wk = match WK.get_or_try_init(Worker::from_settings) { let wk = match WK.get_or_try_init(Worker::from_settings) {
Ok(wk) => wk.clone(), Ok(wk) => wk.clone(),
Err(e) => { Err(e) => {
eprintln!("Failed to initialize runtime: {:?}", e); eprintln!("Failed to initialize runtime: {:?}", e);
return DynemoLlmResult::ERR; return DynamoLlmResult::ERR;
} }
}; };
let rt = wk.runtime(); let rt = wk.runtime();
...@@ -73,7 +73,7 @@ pub unsafe extern "C" fn dynemo_llm_init( ...@@ -73,7 +73,7 @@ pub unsafe extern "C" fn dynemo_llm_init(
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(e) => { Err(e) => {
eprintln!("Failed to initialize distributed runtime: {:?}", e); eprintln!("Failed to initialize distributed runtime: {:?}", e);
Err(DynemoLlmResult::ERR) Err(DynamoLlmResult::ERR)
} }
} }
}); });
...@@ -81,7 +81,7 @@ pub unsafe extern "C" fn dynemo_llm_init( ...@@ -81,7 +81,7 @@ pub unsafe extern "C" fn dynemo_llm_init(
Ok(s) => s.to_string(), Ok(s) => s.to_string(),
Err(e) => { Err(e) => {
eprintln!("Failed to convert C string to Rust string: {:?}", e); eprintln!("Failed to convert C string to Rust string: {:?}", e);
return DynemoLlmResult::ERR; return DynamoLlmResult::ERR;
} }
}; };
...@@ -89,18 +89,18 @@ pub unsafe extern "C" fn dynemo_llm_init( ...@@ -89,18 +89,18 @@ pub unsafe extern "C" fn dynemo_llm_init(
Ok(s) => s.to_string(), Ok(s) => s.to_string(),
Err(e) => { Err(e) => {
eprintln!("Failed to convert C string to Rust string: {:?}", e); eprintln!("Failed to convert C string to Rust string: {:?}", e);
return DynemoLlmResult::ERR; return DynamoLlmResult::ERR;
} }
}; };
match result { match result {
Ok(_) => match KV_PUB Ok(_) => match KV_PUB
.get_or_try_init(move || dynemo_create_kv_publisher(namespace, component, worker_id)) .get_or_try_init(move || dynamo_create_kv_publisher(namespace, component, worker_id))
{ {
Ok(_) => DynemoLlmResult::OK, Ok(_) => DynamoLlmResult::OK,
Err(e) => { Err(e) => {
eprintln!("Failed to initialize distributed runtime: {:?}", e); eprintln!("Failed to initialize distributed runtime: {:?}", e);
DynemoLlmResult::ERR DynamoLlmResult::ERR
} }
}, },
Err(e) => e, Err(e) => e,
...@@ -108,33 +108,33 @@ pub unsafe extern "C" fn dynemo_llm_init( ...@@ -108,33 +108,33 @@ pub unsafe extern "C" fn dynemo_llm_init(
} }
#[no_mangle] #[no_mangle]
pub extern "C" fn dynemo_llm_shutdown() -> DynemoLlmResult { pub extern "C" fn dynamo_llm_shutdown() -> DynamoLlmResult {
let wk = match WK.get() { let wk = match WK.get() {
Some(wk) => wk, Some(wk) => wk,
None => { None => {
eprintln!("Runtime not initialized"); eprintln!("Runtime not initialized");
return DynemoLlmResult::ERR; return DynamoLlmResult::ERR;
} }
}; };
wk.runtime().shutdown(); wk.runtime().shutdown();
DynemoLlmResult::OK DynamoLlmResult::OK
} }
#[no_mangle] #[no_mangle]
pub extern "C" fn dynemo_llm_load_publisher_create() -> DynemoLlmResult { pub extern "C" fn dynamo_llm_load_publisher_create() -> DynamoLlmResult {
DynemoLlmResult::OK DynamoLlmResult::OK
} }
// instantiate a kv publisher // instantiate a kv publisher
// this will bring up the task to publish and the channels to await publishing events // this will bring up the task to publish and the channels to await publishing events
// the [`dynemo_kv_publish_store_event`] call will use a handle to the publisher to send events // the [`dynamo_kv_publish_store_event`] call will use a handle to the publisher to send events
// store and the [`dynemo_kv_event_create_removed`] will create remove events // store and the [`dynamo_kv_event_create_removed`] will create remove events
// these call mus be driving by external c++ threads that are consuming the kv events from the // these call mus be driving by external c++ threads that are consuming the kv events from the
// c++ executor api // c++ executor api
fn dynemo_create_kv_publisher( fn dynamo_create_kv_publisher(
namespace: String, namespace: String,
component: String, component: String,
worker_id: i64, worker_id: i64,
...@@ -238,7 +238,7 @@ fn kv_event_create_removed_from_parts( ...@@ -238,7 +238,7 @@ fn kv_event_create_removed_from_parts(
/// parent_hash is passed as pointer to indicate whether the blocks /// parent_hash is passed as pointer to indicate whether the blocks
/// has a parent hash or not. nullptr is used to represent no parent hash /// has a parent hash or not. nullptr is used to represent no parent hash
#[no_mangle] #[no_mangle]
pub unsafe extern "C" fn dynemo_kv_event_publish_stored( pub unsafe extern "C" fn dynamo_kv_event_publish_stored(
event_id: u64, event_id: u64,
token_ids: *const u32, token_ids: *const u32,
num_block_tokens: *const usize, num_block_tokens: *const usize,
...@@ -246,7 +246,7 @@ pub unsafe extern "C" fn dynemo_kv_event_publish_stored( ...@@ -246,7 +246,7 @@ pub unsafe extern "C" fn dynemo_kv_event_publish_stored(
num_blocks: usize, num_blocks: usize,
parent_hash: *const u64, parent_hash: *const u64,
lora_id: u64, lora_id: u64,
) -> DynemoLlmResult { ) -> DynamoLlmResult {
let publisher = KV_PUB.get().unwrap(); let publisher = KV_PUB.get().unwrap();
let parent_hash = { let parent_hash = {
if parent_hash.is_null() { if parent_hash.is_null() {
...@@ -265,40 +265,40 @@ pub unsafe extern "C" fn dynemo_kv_event_publish_stored( ...@@ -265,40 +265,40 @@ pub unsafe extern "C" fn dynemo_kv_event_publish_stored(
lora_id, lora_id,
); );
match publisher.publish(event) { match publisher.publish(event) {
Ok(_) => DynemoLlmResult::OK, Ok(_) => DynamoLlmResult::OK,
Err(e) => { Err(e) => {
eprintln!("Error publishing stored kv event {:?}", e); eprintln!("Error publishing stored kv event {:?}", e);
DynemoLlmResult::ERR DynamoLlmResult::ERR
} }
} }
} }
#[no_mangle] #[no_mangle]
pub extern "C" fn dynemo_kv_event_publish_removed( pub extern "C" fn dynamo_kv_event_publish_removed(
event_id: u64, event_id: u64,
block_ids: *const u64, block_ids: *const u64,
num_blocks: usize, num_blocks: usize,
) -> DynemoLlmResult { ) -> DynamoLlmResult {
let publisher = KV_PUB.get().unwrap(); let publisher = KV_PUB.get().unwrap();
let event = kv_event_create_removed_from_parts(event_id, block_ids, num_blocks); let event = kv_event_create_removed_from_parts(event_id, block_ids, num_blocks);
match publisher.publish(event) { match publisher.publish(event) {
Ok(_) => DynemoLlmResult::OK, Ok(_) => DynamoLlmResult::OK,
Err(e) => { Err(e) => {
eprintln!("Error publishing removed kv event {:?}", e); eprintln!("Error publishing removed kv event {:?}", e);
DynemoLlmResult::ERR DynamoLlmResult::ERR
} }
} }
} }
// #[no_mangle] // #[no_mangle]
// pub extern "C" fn dynemo_kv_publish_store_event( // pub extern "C" fn dynamo_kv_publish_store_event(
// event_id: u64, // event_id: u64,
// token_ids: *const u32, // token_ids: *const u32,
// num_tokens: usize, // num_tokens: usize,
// lora_id: u64, // lora_id: u64,
// ) -> DynemoLlmResult { // ) -> DynamoLlmResult {
// // if event.is_null() || token_ids.is_null() { // // if event.is_null() || token_ids.is_null() {
// // return dynemoKvErrorType::INVALID_TOKEN_IDS; // // return dynamoKvErrorType::INVALID_TOKEN_IDS;
// // } // // }
// // let tokens = unsafe { std::slice::from_raw_parts(token_ids, num_tokens) }.to_vec(); // // let tokens = unsafe { std::slice::from_raw_parts(token_ids, num_tokens) }.to_vec();
...@@ -311,15 +311,15 @@ pub extern "C" fn dynemo_kv_event_publish_removed( ...@@ -311,15 +311,15 @@ pub extern "C" fn dynemo_kv_event_publish_removed(
// // unsafe { *event = Box::into_raw(new_event) }; // // unsafe { *event = Box::into_raw(new_event) };
// DynemoLlmResult::OK // DynamoLlmResult::OK
// } // }
// #[no_mangle] // #[no_mangle]
// pub extern "C" fn dynemo_kv_event_create_removed( // pub extern "C" fn dynamo_kv_event_create_removed(
// event_id: u64, // event_id: u64,
// block_hashes: *const u64, // block_hashes: *const u64,
// num_hashes: usize, // num_hashes: usize,
// ) -> DynemoLlmResult { // ) -> DynamoLlmResult {
// // if event.is_null() || block_hashes.is_null() { // // if event.is_null() || block_hashes.is_null() {
// // return -1; // // return -1;
// // } // // }
...@@ -334,19 +334,19 @@ pub extern "C" fn dynemo_kv_event_publish_removed( ...@@ -334,19 +334,19 @@ pub extern "C" fn dynemo_kv_event_publish_removed(
// // unsafe { *event = Box::into_raw(new_event) }; // // unsafe { *event = Box::into_raw(new_event) };
// // 0 // // 0
// DynemoLlmResult::OK // DynamoLlmResult::OK
// } // }
// /// create load publisher object and return a handle // /// create load publisher object and return a handle
// /// load publisher will instantiate the nats service and tie its stats handler to // /// load publisher will instantiate the nats service and tie its stats handler to
// /// a watch channel receiver. the watch channel sender will be attach to the // /// a watch channel receiver. the watch channel sender will be attach to the
// /// handle and calls to [`dynemo_load_stats_publish`] issue the stats to the watch t // /// handle and calls to [`dynamo_load_stats_publish`] issue the stats to the watch t
// pub extern "C" fn dynemo_load_publisher_create() -> *mut LoadPublisher { // pub extern "C" fn dynamo_load_publisher_create() -> *mut LoadPublisher {
// // let publisher = Box::new(LoadPublisher::new()); // // let publisher = Box::new(LoadPublisher::new());
// // Box::into_raw(publisher) // // Box::into_raw(publisher)
// } // }
// pub extern "C" fn dynemo_load_stats_publish( // pub extern "C" fn dynamo_load_stats_publish(
// publisher: *mut LoadPublisher, // publisher: *mut LoadPublisher,
// active_slots: u64, // active_slots: u64,
// total_slots: u64, // total_slots: u64,
......
/target /target
python/dynemo/.*.so python/dynamo/.*.so
...@@ -957,7 +957,7 @@ dependencies = [ ...@@ -957,7 +957,7 @@ dependencies = [
] ]
[[package]] [[package]]
name = "dynemo-llm" name = "dynamo-llm"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
...@@ -972,7 +972,7 @@ dependencies = [ ...@@ -972,7 +972,7 @@ dependencies = [
"chrono", "chrono",
"cmake", "cmake",
"derive_builder", "derive_builder",
"dynemo-runtime", "dynamo-runtime",
"either", "either",
"erased-serde", "erased-serde",
"futures", "futures",
...@@ -1008,11 +1008,11 @@ dependencies = [ ...@@ -1008,11 +1008,11 @@ dependencies = [
] ]
[[package]] [[package]]
name = "dynemo-py3" name = "dynamo-py3"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"dynemo-llm", "dynamo-llm",
"dynemo-runtime", "dynamo-runtime",
"futures", "futures",
"once_cell", "once_cell",
"pyo3", "pyo3",
...@@ -1028,7 +1028,7 @@ dependencies = [ ...@@ -1028,7 +1028,7 @@ dependencies = [
] ]
[[package]] [[package]]
name = "dynemo-runtime" name = "dynamo-runtime"
version = "0.2.1" version = "0.2.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
[package] [package]
name = "dynemo-py3" name = "dynamo-py3"
version = "0.2.1" version = "0.2.1"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
...@@ -30,8 +30,9 @@ crate-type = ["cdylib"] ...@@ -30,8 +30,9 @@ crate-type = ["cdylib"]
[dependencies] [dependencies]
dynemo-llm = { path = "../../llm", features = ["python"] }
dynemo-runtime = { path = "../../runtime" } dynamo-llm = { path = "../../llm", features = ["python"] }
dynamo-runtime = { path = "../../runtime" }
futures = "0.3" futures = "0.3"
once_cell = "1.20.3" once_cell = "1.20.3"
......
...@@ -41,7 +41,7 @@ source .venv/bin/activate ...@@ -41,7 +41,7 @@ source .venv/bin/activate
uv pip install maturin uv pip install maturin
``` ```
4. Build and install dynemo wheel 4. Build and install dynamo wheel
``` ```
maturin develop --uv maturin develop --uv
``` ```
......
...@@ -17,7 +17,7 @@ import asyncio ...@@ -17,7 +17,7 @@ import asyncio
import uvloop import uvloop
from dynemo.runtime import DistributedRuntime, dynemo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
uvloop.install() uvloop.install()
...@@ -29,7 +29,7 @@ class RequestHandler: ...@@ -29,7 +29,7 @@ class RequestHandler:
yield char yield char
@dynemo_worker() @dynamo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
component = runtime.namespace("examples/bls").component("bar") component = runtime.namespace("examples/bls").component("bar")
await component.create_service() await component.create_service()
......
...@@ -17,12 +17,12 @@ import asyncio ...@@ -17,12 +17,12 @@ import asyncio
import uvloop import uvloop
from dynemo.runtime import DistributedRuntime, dynemo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
uvloop.install() uvloop.install()
@dynemo_worker() @dynamo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
foo = ( foo = (
await runtime.namespace("examples/bls") await runtime.namespace("examples/bls")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment