refactor: move libs to lib dir

Signed-off-by: Neelay Shah <neelays@nvidia.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>

refactor: move libs to lib dir
Signed-off-by: Neelay Shah <neelays@nvidia.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
08fcd7e9 · Neelay Shah · GitHub · 0bfd9a76 · 08fcd7e9 · 08fcd7e9
Commit 08fcd7e9 authored Feb 24, 2025 by Neelay Shah Committed by GitHub Feb 24, 2025
20 changed files
--- a/examples/python_rs/llm/vllm/kv_router/processor.py
+++ b/examples/python_rs/llm/vllm/kv_router/processor.py
@@ -22,8 +22,6 @@ from common.chat_processor import ChatProcessor, ProcessMixIn
 from common.parser import parse_vllm_args
 from common.protocol import MyRequestOutput, Tokens, vLLMGenerateRequest
 from transformers import AutoTokenizer
-from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
-from triton_distributed_rs._core import Client
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.entrypoints.openai.protocol import (
    ChatCompletionRequest,
@@ -33,6 +31,13 @@ from vllm.logger import logger as vllm_logger
 from vllm.outputs import RequestOutput
 from vllm.transformers_utils.tokenizer import AnyTokenizer
+from triton_distributed._core import Client
+from triton_distributed.runtime import (
+    DistributedRuntime,
+    triton_endpoint,
+    triton_worker,
+)
 class Processor(ProcessMixIn):
    """

--- a/examples/python_rs/llm/vllm/kv_router/router.py
+++ b/examples/python_rs/llm/vllm/kv_router/router.py
@@ -21,13 +21,14 @@ from typing import AsyncIterator
 import uvloop
 from common.protocol import Tokens
-from triton_distributed_rs import (
+from vllm.logger import logger as vllm_logger
+from triton_distributed.llm import KvRouter
+from triton_distributed.runtime import (
    DistributedRuntime,
-    KvRouter,
    triton_endpoint,
    triton_worker,
 )
-from vllm.logger import logger as vllm_logger
 WorkerId = str

--- a/examples/python_rs/llm/vllm/kv_router/worker.py
+++ b/examples/python_rs/llm/vllm/kv_router/worker.py
@@ -22,11 +22,16 @@ import uvloop
 from common.base_engine import BaseVllmEngine
 from common.parser import parse_vllm_args
 from common.protocol import MyRequestOutput, vLLMGenerateRequest
-from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.logger import logger as vllm_logger
 from vllm.sampling_params import RequestOutputKind
+from triton_distributed.runtime import (
+    DistributedRuntime,
+    triton_endpoint,
+    triton_worker,
+)
 vllm_logger.info(f"VLLM_KV_CAPI_PATH: {os.environ['VLLM_KV_CAPI_PATH']}")

--- a/examples/python_rs/llm/vllm/monolith/worker.py
+++ b/examples/python_rs/llm/vllm/monolith/worker.py
@@ -21,7 +21,6 @@ import uvloop
 from common.base_engine import BaseVllmEngine
 from common.chat_processor import ProcessMixIn
 from common.parser import parse_vllm_args
-from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.entrypoints.openai.protocol import (
    ChatCompletionRequest,
@@ -29,6 +28,12 @@ from vllm.entrypoints.openai.protocol import (
 )
 from vllm.logger import logger as vllm_logger
+from triton_distributed.runtime import (
+    DistributedRuntime,
+    triton_endpoint,
+    triton_worker,
+)
 class VllmEngine(BaseVllmEngine, ProcessMixIn):
    """

--- a/examples/rust/Cargo.toml
+++ b/examples/rust/Cargo.toml
@@ -33,8 +33,8 @@ repository = "https://github.com/triton-inference-server/triton_distributed"
 [workspace.dependencies]
 # local or crates.io
-triton-distributed = { path = "../../runtime/rust" }
+triton-distributed-runtime = { path = "../../lib/runtime" }
-triton-llm = { path = "../../llm/rust/triton-llm" }
+triton-distributed-llm = { path = "../../lib/llm" }
 # crates.io
 anyhow = { version = "1" }

--- a/examples/rust/hello_world/Cargo.toml
+++ b/examples/rust/hello_world/Cargo.toml
@@ -22,6 +22,6 @@ license.workspace = true
 homepage.workspace = true
 [dependencies]
-triton-distributed = { workspace = true }
+triton-distributed-runtime = { workspace = true }
 # third-party
--- a/examples/rust/hello_world/src/bin/client.rs
+++ b/examples/rust/hello_world/src/bin/client.rs
@@ -14,7 +14,7 @@
 // limitations under the License.
 use hello_world::DEFAULT_NAMESPACE;
-use triton_distributed::{
+use triton_distributed_runtime::{
    logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result,
    Runtime, Worker,
 };

--- a/examples/rust/hello_world/src/bin/server.rs
+++ b/examples/rust/hello_world/src/bin/server.rs
@@ -15,7 +15,7 @@
 use hello_world::DEFAULT_NAMESPACE;
 use std::sync::Arc;
-use triton_distributed::{
+use triton_distributed_runtime::{
    logging,
    pipeline::{
        async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,

--- a/examples/rust/http/Cargo.toml
+++ b/examples/rust/http/Cargo.toml
@@ -24,8 +24,8 @@ homepage.workspace = true
 repository.workspace = true
 [dependencies]
-triton-distributed = { workspace = true}
+triton-distributed-runtime = { workspace = true}
-triton-llm = { workspace = true}
+triton-distributed-llm = { workspace = true}
 clap = { version = "4.5", features = ["derive"] }
 serde = { workspace = true }

--- a/examples/rust/http/src/main.rs
+++ b/examples/rust/http/src/main.rs
@@ -17,8 +17,8 @@ use std::sync::Arc;
 use clap::Parser;
 use std::env;
-use triton_distributed::{logging, DistributedRuntime, Result, Runtime, Worker};
+use triton_distributed_runtime::{logging, DistributedRuntime, Result, Runtime, Worker};
-use triton_llm::http::service::{
+use triton_distributed_llm::http::service::{
    discovery::{model_watcher, ModelWatchState},
    service_v2::HttpService,
 };

--- a/examples/rust/llmctl/Cargo.toml
+++ b/examples/rust/llmctl/Cargo.toml
@@ -23,8 +23,8 @@ homepage.workspace = true
 repository.workspace = true
 [dependencies]
-triton-distributed = { workspace = true}
+triton-distributed-runtime = { workspace = true}
-triton-llm = { workspace = true}
+triton-distributed-llm = { workspace = true}
 serde = { workspace = true }
 serde_json = { workspace = true }

--- a/examples/rust/llmctl/src/main.rs
+++ b/examples/rust/llmctl/src/main.rs
@@ -16,11 +16,11 @@
 use clap::{Parser, Subcommand};
 use tracing as log;
-use triton_distributed::{
+use triton_distributed_runtime::{
    distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime,
    Result, Runtime, Worker,
 };
-use triton_llm::http::service::discovery::ModelEntry;
+use triton_distributed_llm::http::service::discovery::ModelEntry;
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]

--- a/examples/rust/service_metrics/Cargo.toml
+++ b/examples/rust/service_metrics/Cargo.toml
@@ -23,7 +23,7 @@ homepage.workspace = true
 repository.workspace = true
 [dependencies]
-triton-distributed = { workspace = true }
+triton-distributed-runtime = { workspace = true }
 # third-party
 futures = { workspace = true }

--- a/examples/rust/service_metrics/src/bin/client.rs
+++ b/examples/rust/service_metrics/src/bin/client.rs
@@ -16,7 +16,7 @@
 use futures::StreamExt;
 use service_metrics::DEFAULT_NAMESPACE;
-use triton_distributed::{
+use triton_distributed_runtime::{
    logging,
    protocols::annotated::Annotated,
    utils::{stream, Duration, Instant},

--- a/examples/rust/service_metrics/src/bin/server.rs
+++ b/examples/rust/service_metrics/src/bin/server.rs
@@ -16,7 +16,7 @@
 use service_metrics::DEFAULT_NAMESPACE;
 use std::sync::Arc;
-use triton_distributed::{
+use triton_distributed_runtime::{
    logging,
    pipeline::{
        async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,

--- a/llm/rust/libtriton-llm/Cargo.lock
+++ b/llm/rust/libtriton-llm/Cargo.lock
--- a/llm/rust/libtriton-llm/Cargo.toml
+++ b/llm/rust/libtriton-llm/Cargo.toml
@@ -14,7 +14,7 @@
 # limitations under the License.
 [package]
-name = "libtriton-llm"
+name = "libtriton-distributed-llm"
 version = "0.1.1"
 edition = "2021"
 authors = ["NVIDIA"]
@@ -23,15 +23,15 @@ homepage = "https://github.com/triton-inference-server/triton_distributed"
 repository = "https://github.com/triton-inference-server/triton_distributed"
 [lib]
-name = "triton_llm_capi"
+name = "triton_distributed_llm_capi"
 crate-type = ["cdylib"]
 [build-dependencies]
 cbindgen = "0.27"
 [dependencies]
-triton-llm = { path = "../triton-llm" }
+triton-distributed-llm = { path = "../../llm" }
-triton-distributed = { workspace = true }
+triton-distributed-runtime = { path = "../../runtime" }
 anyhow = { version = "1" }
 futures = "0.3"

--- a/llm/rust/libtriton-llm/build.rs
+++ b/llm/rust/libtriton-llm/build.rs
--- a/llm/rust/libtriton-llm/cbindgen.toml
+++ b/llm/rust/libtriton-llm/cbindgen.toml
--- a/llm/rust/libtriton-llm/src/lib.rs
+++ b/llm/rust/libtriton-llm/src/lib.rs
@@ -21,8 +21,8 @@ use std::sync::atomic::{AtomicU32, Ordering};
 use tracing as log;
 use uuid::Uuid;
-use triton_distributed::{DistributedRuntime, Worker};
+use triton_distributed_runtime::{DistributedRuntime, Worker};
-use triton_llm::kv_router::{
+use triton_distributed_llm::kv_router::{
    indexer::compute_block_hash_for_seq, protocols::*, publisher::KvPublisher,
 };
 static WK: OnceCell<Worker> = OnceCell::new();