Commit 73c10ae9 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: add cli args for example http service (#221)


Co-authored-by: default avatarBiswa Ranjan Panda <biswaranjanp@nvidia.com>
parent 60a73634
......@@ -59,7 +59,7 @@ Run the server logging (with debug level logging):
```bash
TRD_LOG=DEBUG http
```
By default the server will run on port 9992.
By default the server will run on port 8080.
Add model to the server:
```bash
......@@ -116,7 +116,7 @@ The disaggregated deployment utilizes separate GPUs for prefill and decode opera
### 3. Client
```bash
curl localhost:9992/v1/chat/completions \
curl localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
......
......@@ -1127,6 +1127,7 @@ dependencies = [
name = "http"
version = "0.2.0"
dependencies = [
"clap",
"serde",
"serde_json",
"tokio",
......
......@@ -26,6 +26,7 @@ repository.workspace = true
[dependencies]
triton-distributed = { workspace = true}
triton-llm = { workspace = true}
clap = { version = "4.5", features = ["derive"] }
serde = { workspace = true }
serde_json = { workspace = true }
......
......@@ -14,6 +14,8 @@
// limitations under the License.
use std::sync::Arc;
use clap::Parser;
use std::env;
use triton_distributed::{logging, DistributedRuntime, Result, Runtime, Worker};
use triton_llm::http::service::{
......@@ -21,6 +23,26 @@ use triton_llm::http::service::{
service_v2::HttpService,
};
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Host for the HTTP service
#[arg(long, default_value = "0.0.0.0")]
host: String,
/// Port number for the HTTP service
#[arg(short, long, default_value = "8080")]
port: u16,
/// Namespace for the distributed component
#[arg(long, default_value = "public")]
namespace: String,
/// Component name for the service
#[arg(long, default_value = "http")]
component: String,
}
fn main() -> Result<()> {
logging::init();
let worker = Worker::from_settings()?;
......@@ -30,8 +52,13 @@ fn main() -> Result<()> {
async fn app(runtime: Runtime) -> Result<()> {
let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
let args = Args::parse();
// create the http service and acquire the model manager
let http_service = HttpService::builder().port(9992).build()?;
let http_service = HttpService::builder()
.port(args.port)
.host(args.host)
.build()?;
let manager = http_service.model_manager().clone();
// todo - use the IntoComponent trait to register the component
......@@ -42,7 +69,7 @@ async fn app(runtime: Runtime) -> Result<()> {
// written to etcd
// the cli when operating on an `http` component will validate the namespace.component is
// registered with HttpServiceComponentDefinition
let component = distributed.namespace("public")?.component("http")?;
let component = distributed.namespace(&args.namespace)?.component(&args.component)?;
let etcd_root = component.etcd_path();
let etcd_path = format!("{}/models/chat/", etcd_root);
......
......@@ -25,6 +25,7 @@ pub struct HttpService {
models: ModelManager,
router: axum::Router,
port: u16,
host: String,
}
#[derive(Clone, Builder)]
......@@ -33,6 +34,9 @@ pub struct HttpServiceConfig {
#[builder(default = "8787")]
port: u16,
#[builder(setter(into), default = "String::from(\"0.0.0.0\")")]
host: String,
// #[builder(default)]
// custom: Vec<axum::Router>
#[builder(default = "true")]
......@@ -57,7 +61,7 @@ impl HttpService {
}
pub async fn run(&self, cancel_token: CancellationToken) -> Result<()> {
let address = format!("0.0.0.0:{}", self.port);
let address = format!("{}:{}", self.host, self.port);
tracing::info!(address, "Starting HTTP service on: {address}");
let listener = tokio::net::TcpListener::bind(address.as_str())
......@@ -122,6 +126,7 @@ impl HttpServiceConfigBuilder {
models: model_manager,
router,
port: config.port,
host: config.host,
})
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment