Commit 73c10ae9 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: add cli args for example http service (#221)


Co-authored-by: default avatarBiswa Ranjan Panda <biswaranjanp@nvidia.com>
parent 60a73634
...@@ -59,7 +59,7 @@ Run the server logging (with debug level logging): ...@@ -59,7 +59,7 @@ Run the server logging (with debug level logging):
```bash ```bash
TRD_LOG=DEBUG http TRD_LOG=DEBUG http
``` ```
By default the server will run on port 9992. By default the server will run on port 8080.
Add model to the server: Add model to the server:
```bash ```bash
...@@ -116,7 +116,7 @@ The disaggregated deployment utilizes separate GPUs for prefill and decode opera ...@@ -116,7 +116,7 @@ The disaggregated deployment utilizes separate GPUs for prefill and decode opera
### 3. Client ### 3. Client
```bash ```bash
curl localhost:9992/v1/chat/completions \ curl localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
......
...@@ -1127,6 +1127,7 @@ dependencies = [ ...@@ -1127,6 +1127,7 @@ dependencies = [
name = "http" name = "http"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"clap",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
......
...@@ -26,6 +26,7 @@ repository.workspace = true ...@@ -26,6 +26,7 @@ repository.workspace = true
[dependencies] [dependencies]
triton-distributed = { workspace = true} triton-distributed = { workspace = true}
triton-llm = { workspace = true} triton-llm = { workspace = true}
clap = { version = "4.5", features = ["derive"] }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
// limitations under the License. // limitations under the License.
use std::sync::Arc; use std::sync::Arc;
use clap::Parser;
use std::env;
use triton_distributed::{logging, DistributedRuntime, Result, Runtime, Worker}; use triton_distributed::{logging, DistributedRuntime, Result, Runtime, Worker};
use triton_llm::http::service::{ use triton_llm::http::service::{
...@@ -21,6 +23,26 @@ use triton_llm::http::service::{ ...@@ -21,6 +23,26 @@ use triton_llm::http::service::{
service_v2::HttpService, service_v2::HttpService,
}; };
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Host for the HTTP service
#[arg(long, default_value = "0.0.0.0")]
host: String,
/// Port number for the HTTP service
#[arg(short, long, default_value = "8080")]
port: u16,
/// Namespace for the distributed component
#[arg(long, default_value = "public")]
namespace: String,
/// Component name for the service
#[arg(long, default_value = "http")]
component: String,
}
fn main() -> Result<()> { fn main() -> Result<()> {
logging::init(); logging::init();
let worker = Worker::from_settings()?; let worker = Worker::from_settings()?;
...@@ -30,8 +52,13 @@ fn main() -> Result<()> { ...@@ -30,8 +52,13 @@ fn main() -> Result<()> {
async fn app(runtime: Runtime) -> Result<()> { async fn app(runtime: Runtime) -> Result<()> {
let distributed = DistributedRuntime::from_settings(runtime.clone()).await?; let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
let args = Args::parse();
// create the http service and acquire the model manager // create the http service and acquire the model manager
let http_service = HttpService::builder().port(9992).build()?; let http_service = HttpService::builder()
.port(args.port)
.host(args.host)
.build()?;
let manager = http_service.model_manager().clone(); let manager = http_service.model_manager().clone();
// todo - use the IntoComponent trait to register the component // todo - use the IntoComponent trait to register the component
...@@ -42,7 +69,7 @@ async fn app(runtime: Runtime) -> Result<()> { ...@@ -42,7 +69,7 @@ async fn app(runtime: Runtime) -> Result<()> {
// written to etcd // written to etcd
// the cli when operating on an `http` component will validate the namespace.component is // the cli when operating on an `http` component will validate the namespace.component is
// registered with HttpServiceComponentDefinition // registered with HttpServiceComponentDefinition
let component = distributed.namespace("public")?.component("http")?; let component = distributed.namespace(&args.namespace)?.component(&args.component)?;
let etcd_root = component.etcd_path(); let etcd_root = component.etcd_path();
let etcd_path = format!("{}/models/chat/", etcd_root); let etcd_path = format!("{}/models/chat/", etcd_root);
......
...@@ -25,6 +25,7 @@ pub struct HttpService { ...@@ -25,6 +25,7 @@ pub struct HttpService {
models: ModelManager, models: ModelManager,
router: axum::Router, router: axum::Router,
port: u16, port: u16,
host: String,
} }
#[derive(Clone, Builder)] #[derive(Clone, Builder)]
...@@ -33,6 +34,9 @@ pub struct HttpServiceConfig { ...@@ -33,6 +34,9 @@ pub struct HttpServiceConfig {
#[builder(default = "8787")] #[builder(default = "8787")]
port: u16, port: u16,
#[builder(setter(into), default = "String::from(\"0.0.0.0\")")]
host: String,
// #[builder(default)] // #[builder(default)]
// custom: Vec<axum::Router> // custom: Vec<axum::Router>
#[builder(default = "true")] #[builder(default = "true")]
...@@ -57,7 +61,7 @@ impl HttpService { ...@@ -57,7 +61,7 @@ impl HttpService {
} }
pub async fn run(&self, cancel_token: CancellationToken) -> Result<()> { pub async fn run(&self, cancel_token: CancellationToken) -> Result<()> {
let address = format!("0.0.0.0:{}", self.port); let address = format!("{}:{}", self.host, self.port);
tracing::info!(address, "Starting HTTP service on: {address}"); tracing::info!(address, "Starting HTTP service on: {address}");
let listener = tokio::net::TcpListener::bind(address.as_str()) let listener = tokio::net::TcpListener::bind(address.as_str())
...@@ -122,6 +126,7 @@ impl HttpServiceConfigBuilder { ...@@ -122,6 +126,7 @@ impl HttpServiceConfigBuilder {
models: model_manager, models: model_manager,
router, router,
port: config.port, port: config.port,
host: config.host,
}) })
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment