Unverified Commit f9ba6f5c authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

feat: Publish events and metrics when using kv routing (#1262)

parent 4e47903b
...@@ -10,6 +10,7 @@ use anyhow::Context; ...@@ -10,6 +10,7 @@ use anyhow::Context;
use regex::Regex; use regex::Regex;
use tokio::io::AsyncBufReadExt; use tokio::io::AsyncBufReadExt;
use crate::flags::RouterMode;
use dynamo_llm::engines::MultiNodeConfig; use dynamo_llm::engines::MultiNodeConfig;
use dynamo_llm::local_model::LocalModel; use dynamo_llm::local_model::LocalModel;
use dynamo_runtime::protocols::Endpoint as EndpointId; use dynamo_runtime::protocols::Endpoint as EndpointId;
...@@ -51,6 +52,12 @@ pub async fn start( ...@@ -51,6 +52,12 @@ pub async fn start(
"--context-length".to_string(), "--context-length".to_string(),
card.context_length.to_string(), card.context_length.to_string(),
]; ];
// TRTLLM only
// The worker node will only publish events and metrics if the router mode is KV
if flags.router_mode == RouterMode::KV {
args.push("--publish-events-and-metrics".to_string());
}
// sglang only // sglang only
// vllm uses CUDA_VISIBLE_DEVICES // vllm uses CUDA_VISIBLE_DEVICES
if flags.base_gpu_id != 0 { if flags.base_gpu_id != 0 {
......
...@@ -178,7 +178,11 @@ async def init(runtime: DistributedRuntime, config: Config): ...@@ -178,7 +178,11 @@ async def init(runtime: DistributedRuntime, config: Config):
async with get_tensorrtllm_engine(engine_args) as engine: async with get_tensorrtllm_engine(engine_args) as engine:
endpoint = component.endpoint(config.endpoint) endpoint = component.endpoint(config.endpoint)
await register_llm( await register_llm(
ModelType.Backend, endpoint, config.model_path, config.model_name ModelType.Backend,
endpoint,
config.model_path,
config.model_name,
kv_cache_block_size=config.kv_block_size,
) )
if config.publish_events_and_metrics: if config.publish_events_and_metrics:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment