Commit 08fcd7e9 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: move libs to lib dir


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent 0bfd9a76
...@@ -22,8 +22,6 @@ from common.chat_processor import ChatProcessor, ProcessMixIn ...@@ -22,8 +22,6 @@ from common.chat_processor import ChatProcessor, ProcessMixIn
from common.parser import parse_vllm_args from common.parser import parse_vllm_args
from common.protocol import MyRequestOutput, Tokens, vLLMGenerateRequest from common.protocol import MyRequestOutput, Tokens, vLLMGenerateRequest
from transformers import AutoTokenizer from transformers import AutoTokenizer
from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
from triton_distributed_rs._core import Client
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
...@@ -33,6 +31,13 @@ from vllm.logger import logger as vllm_logger ...@@ -33,6 +31,13 @@ from vllm.logger import logger as vllm_logger
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer
from triton_distributed._core import Client
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
class Processor(ProcessMixIn): class Processor(ProcessMixIn):
""" """
......
...@@ -21,13 +21,14 @@ from typing import AsyncIterator ...@@ -21,13 +21,14 @@ from typing import AsyncIterator
import uvloop import uvloop
from common.protocol import Tokens from common.protocol import Tokens
from triton_distributed_rs import ( from vllm.logger import logger as vllm_logger
from triton_distributed.llm import KvRouter
from triton_distributed.runtime import (
DistributedRuntime, DistributedRuntime,
KvRouter,
triton_endpoint, triton_endpoint,
triton_worker, triton_worker,
) )
from vllm.logger import logger as vllm_logger
WorkerId = str WorkerId = str
......
...@@ -22,11 +22,16 @@ import uvloop ...@@ -22,11 +22,16 @@ import uvloop
from common.base_engine import BaseVllmEngine from common.base_engine import BaseVllmEngine
from common.parser import parse_vllm_args from common.parser import parse_vllm_args
from common.protocol import MyRequestOutput, vLLMGenerateRequest from common.protocol import MyRequestOutput, vLLMGenerateRequest
from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.logger import logger as vllm_logger from vllm.logger import logger as vllm_logger
from vllm.sampling_params import RequestOutputKind from vllm.sampling_params import RequestOutputKind
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
vllm_logger.info(f"VLLM_KV_CAPI_PATH: {os.environ['VLLM_KV_CAPI_PATH']}") vllm_logger.info(f"VLLM_KV_CAPI_PATH: {os.environ['VLLM_KV_CAPI_PATH']}")
......
...@@ -21,7 +21,6 @@ import uvloop ...@@ -21,7 +21,6 @@ import uvloop
from common.base_engine import BaseVllmEngine from common.base_engine import BaseVllmEngine
from common.chat_processor import ProcessMixIn from common.chat_processor import ProcessMixIn
from common.parser import parse_vllm_args from common.parser import parse_vllm_args
from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
...@@ -29,6 +28,12 @@ from vllm.entrypoints.openai.protocol import ( ...@@ -29,6 +28,12 @@ from vllm.entrypoints.openai.protocol import (
) )
from vllm.logger import logger as vllm_logger from vllm.logger import logger as vllm_logger
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
class VllmEngine(BaseVllmEngine, ProcessMixIn): class VllmEngine(BaseVllmEngine, ProcessMixIn):
""" """
......
...@@ -33,8 +33,8 @@ repository = "https://github.com/triton-inference-server/triton_distributed" ...@@ -33,8 +33,8 @@ repository = "https://github.com/triton-inference-server/triton_distributed"
[workspace.dependencies] [workspace.dependencies]
# local or crates.io # local or crates.io
triton-distributed = { path = "../../runtime/rust" } triton-distributed-runtime = { path = "../../lib/runtime" }
triton-llm = { path = "../../llm/rust/triton-llm" } triton-distributed-llm = { path = "../../lib/llm" }
# crates.io # crates.io
anyhow = { version = "1" } anyhow = { version = "1" }
......
...@@ -22,6 +22,6 @@ license.workspace = true ...@@ -22,6 +22,6 @@ license.workspace = true
homepage.workspace = true homepage.workspace = true
[dependencies] [dependencies]
triton-distributed = { workspace = true } triton-distributed-runtime = { workspace = true }
# third-party # third-party
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
// limitations under the License. // limitations under the License.
use hello_world::DEFAULT_NAMESPACE; use hello_world::DEFAULT_NAMESPACE;
use triton_distributed::{ use triton_distributed_runtime::{
logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result, logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result,
Runtime, Worker, Runtime, Worker,
}; };
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
use hello_world::DEFAULT_NAMESPACE; use hello_world::DEFAULT_NAMESPACE;
use std::sync::Arc; use std::sync::Arc;
use triton_distributed::{ use triton_distributed_runtime::{
logging, logging,
pipeline::{ pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
......
...@@ -24,8 +24,8 @@ homepage.workspace = true ...@@ -24,8 +24,8 @@ homepage.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
triton-distributed = { workspace = true} triton-distributed-runtime = { workspace = true}
triton-llm = { workspace = true} triton-distributed-llm = { workspace = true}
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
serde = { workspace = true } serde = { workspace = true }
......
...@@ -17,8 +17,8 @@ use std::sync::Arc; ...@@ -17,8 +17,8 @@ use std::sync::Arc;
use clap::Parser; use clap::Parser;
use std::env; use std::env;
use triton_distributed::{logging, DistributedRuntime, Result, Runtime, Worker}; use triton_distributed_runtime::{logging, DistributedRuntime, Result, Runtime, Worker};
use triton_llm::http::service::{ use triton_distributed_llm::http::service::{
discovery::{model_watcher, ModelWatchState}, discovery::{model_watcher, ModelWatchState},
service_v2::HttpService, service_v2::HttpService,
}; };
......
...@@ -23,8 +23,8 @@ homepage.workspace = true ...@@ -23,8 +23,8 @@ homepage.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
triton-distributed = { workspace = true} triton-distributed-runtime = { workspace = true}
triton-llm = { workspace = true} triton-distributed-llm = { workspace = true}
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
......
...@@ -16,11 +16,11 @@ ...@@ -16,11 +16,11 @@
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use tracing as log; use tracing as log;
use triton_distributed::{ use triton_distributed_runtime::{
distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime, distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime,
Result, Runtime, Worker, Result, Runtime, Worker,
}; };
use triton_llm::http::service::discovery::ModelEntry; use triton_distributed_llm::http::service::discovery::ModelEntry;
#[derive(Parser)] #[derive(Parser)]
#[command(author, version, about, long_about = None)] #[command(author, version, about, long_about = None)]
......
...@@ -23,7 +23,7 @@ homepage.workspace = true ...@@ -23,7 +23,7 @@ homepage.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
triton-distributed = { workspace = true } triton-distributed-runtime = { workspace = true }
# third-party # third-party
futures = { workspace = true } futures = { workspace = true }
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
use futures::StreamExt; use futures::StreamExt;
use service_metrics::DEFAULT_NAMESPACE; use service_metrics::DEFAULT_NAMESPACE;
use triton_distributed::{ use triton_distributed_runtime::{
logging, logging,
protocols::annotated::Annotated, protocols::annotated::Annotated,
utils::{stream, Duration, Instant}, utils::{stream, Duration, Instant},
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
use service_metrics::DEFAULT_NAMESPACE; use service_metrics::DEFAULT_NAMESPACE;
use std::sync::Arc; use std::sync::Arc;
use triton_distributed::{ use triton_distributed_runtime::{
logging, logging,
pipeline::{ pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
[package] [package]
name = "libtriton-llm" name = "libtriton-distributed-llm"
version = "0.1.1" version = "0.1.1"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
...@@ -23,15 +23,15 @@ homepage = "https://github.com/triton-inference-server/triton_distributed" ...@@ -23,15 +23,15 @@ homepage = "https://github.com/triton-inference-server/triton_distributed"
repository = "https://github.com/triton-inference-server/triton_distributed" repository = "https://github.com/triton-inference-server/triton_distributed"
[lib] [lib]
name = "triton_llm_capi" name = "triton_distributed_llm_capi"
crate-type = ["cdylib"] crate-type = ["cdylib"]
[build-dependencies] [build-dependencies]
cbindgen = "0.27" cbindgen = "0.27"
[dependencies] [dependencies]
triton-llm = { path = "../triton-llm" } triton-distributed-llm = { path = "../../llm" }
triton-distributed = { workspace = true } triton-distributed-runtime = { path = "../../runtime" }
anyhow = { version = "1" } anyhow = { version = "1" }
futures = "0.3" futures = "0.3"
......
...@@ -21,8 +21,8 @@ use std::sync::atomic::{AtomicU32, Ordering}; ...@@ -21,8 +21,8 @@ use std::sync::atomic::{AtomicU32, Ordering};
use tracing as log; use tracing as log;
use uuid::Uuid; use uuid::Uuid;
use triton_distributed::{DistributedRuntime, Worker}; use triton_distributed_runtime::{DistributedRuntime, Worker};
use triton_llm::kv_router::{ use triton_distributed_llm::kv_router::{
indexer::compute_block_hash_for_seq, protocols::*, publisher::KvPublisher, indexer::compute_block_hash_for_seq, protocols::*, publisher::KvPublisher,
}; };
static WK: OnceCell<Worker> = OnceCell::new(); static WK: OnceCell<Worker> = OnceCell::new();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment