Commit 08fcd7e9 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: move libs to lib dir


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent 0bfd9a76
......@@ -22,8 +22,6 @@ from common.chat_processor import ChatProcessor, ProcessMixIn
from common.parser import parse_vllm_args
from common.protocol import MyRequestOutput, Tokens, vLLMGenerateRequest
from transformers import AutoTokenizer
from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
from triton_distributed_rs._core import Client
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
......@@ -33,6 +31,13 @@ from vllm.logger import logger as vllm_logger
from vllm.outputs import RequestOutput
from vllm.transformers_utils.tokenizer import AnyTokenizer
from triton_distributed._core import Client
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
class Processor(ProcessMixIn):
"""
......
......@@ -21,13 +21,14 @@ from typing import AsyncIterator
import uvloop
from common.protocol import Tokens
from triton_distributed_rs import (
from vllm.logger import logger as vllm_logger
from triton_distributed.llm import KvRouter
from triton_distributed.runtime import (
DistributedRuntime,
KvRouter,
triton_endpoint,
triton_worker,
)
from vllm.logger import logger as vllm_logger
WorkerId = str
......
......@@ -22,11 +22,16 @@ import uvloop
from common.base_engine import BaseVllmEngine
from common.parser import parse_vllm_args
from common.protocol import MyRequestOutput, vLLMGenerateRequest
from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.logger import logger as vllm_logger
from vllm.sampling_params import RequestOutputKind
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
vllm_logger.info(f"VLLM_KV_CAPI_PATH: {os.environ['VLLM_KV_CAPI_PATH']}")
......
......@@ -21,7 +21,6 @@ import uvloop
from common.base_engine import BaseVllmEngine
from common.chat_processor import ProcessMixIn
from common.parser import parse_vllm_args
from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
......@@ -29,6 +28,12 @@ from vllm.entrypoints.openai.protocol import (
)
from vllm.logger import logger as vllm_logger
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
class VllmEngine(BaseVllmEngine, ProcessMixIn):
"""
......
......@@ -33,8 +33,8 @@ repository = "https://github.com/triton-inference-server/triton_distributed"
[workspace.dependencies]
# local or crates.io
triton-distributed = { path = "../../runtime/rust" }
triton-llm = { path = "../../llm/rust/triton-llm" }
triton-distributed-runtime = { path = "../../lib/runtime" }
triton-distributed-llm = { path = "../../lib/llm" }
# crates.io
anyhow = { version = "1" }
......
......@@ -22,6 +22,6 @@ license.workspace = true
homepage.workspace = true
[dependencies]
triton-distributed = { workspace = true }
triton-distributed-runtime = { workspace = true }
# third-party
......@@ -14,7 +14,7 @@
// limitations under the License.
use hello_world::DEFAULT_NAMESPACE;
use triton_distributed::{
use triton_distributed_runtime::{
logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result,
Runtime, Worker,
};
......
......@@ -15,7 +15,7 @@
use hello_world::DEFAULT_NAMESPACE;
use std::sync::Arc;
use triton_distributed::{
use triton_distributed_runtime::{
logging,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
......
......@@ -24,8 +24,8 @@ homepage.workspace = true
repository.workspace = true
[dependencies]
triton-distributed = { workspace = true}
triton-llm = { workspace = true}
triton-distributed-runtime = { workspace = true}
triton-distributed-llm = { workspace = true}
clap = { version = "4.5", features = ["derive"] }
serde = { workspace = true }
......
......@@ -17,8 +17,8 @@ use std::sync::Arc;
use clap::Parser;
use std::env;
use triton_distributed::{logging, DistributedRuntime, Result, Runtime, Worker};
use triton_llm::http::service::{
use triton_distributed_runtime::{logging, DistributedRuntime, Result, Runtime, Worker};
use triton_distributed_llm::http::service::{
discovery::{model_watcher, ModelWatchState},
service_v2::HttpService,
};
......
......@@ -23,8 +23,8 @@ homepage.workspace = true
repository.workspace = true
[dependencies]
triton-distributed = { workspace = true}
triton-llm = { workspace = true}
triton-distributed-runtime = { workspace = true}
triton-distributed-llm = { workspace = true}
serde = { workspace = true }
serde_json = { workspace = true }
......
......@@ -16,11 +16,11 @@
use clap::{Parser, Subcommand};
use tracing as log;
use triton_distributed::{
use triton_distributed_runtime::{
distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime,
Result, Runtime, Worker,
};
use triton_llm::http::service::discovery::ModelEntry;
use triton_distributed_llm::http::service::discovery::ModelEntry;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
......
......@@ -23,7 +23,7 @@ homepage.workspace = true
repository.workspace = true
[dependencies]
triton-distributed = { workspace = true }
triton-distributed-runtime = { workspace = true }
# third-party
futures = { workspace = true }
......
......@@ -16,7 +16,7 @@
use futures::StreamExt;
use service_metrics::DEFAULT_NAMESPACE;
use triton_distributed::{
use triton_distributed_runtime::{
logging,
protocols::annotated::Annotated,
utils::{stream, Duration, Instant},
......
......@@ -16,7 +16,7 @@
use service_metrics::DEFAULT_NAMESPACE;
use std::sync::Arc;
use triton_distributed::{
use triton_distributed_runtime::{
logging,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
......
......@@ -14,7 +14,7 @@
# limitations under the License.
[package]
name = "libtriton-llm"
name = "libtriton-distributed-llm"
version = "0.1.1"
edition = "2021"
authors = ["NVIDIA"]
......@@ -23,15 +23,15 @@ homepage = "https://github.com/triton-inference-server/triton_distributed"
repository = "https://github.com/triton-inference-server/triton_distributed"
[lib]
name = "triton_llm_capi"
name = "triton_distributed_llm_capi"
crate-type = ["cdylib"]
[build-dependencies]
cbindgen = "0.27"
[dependencies]
triton-llm = { path = "../triton-llm" }
triton-distributed = { workspace = true }
triton-distributed-llm = { path = "../../llm" }
triton-distributed-runtime = { path = "../../runtime" }
anyhow = { version = "1" }
futures = "0.3"
......
......@@ -21,8 +21,8 @@ use std::sync::atomic::{AtomicU32, Ordering};
use tracing as log;
use uuid::Uuid;
use triton_distributed::{DistributedRuntime, Worker};
use triton_llm::kv_router::{
use triton_distributed_runtime::{DistributedRuntime, Worker};
use triton_distributed_llm::kv_router::{
indexer::compute_block_hash_for_seq, protocols::*, publisher::KvPublisher,
};
static WK: OnceCell<Worker> = OnceCell::new();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment