Unverified Commit f9ba6f5c authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

feat: Publish events and metrics when using kv routing (#1262)

parent 4e47903b
......@@ -10,6 +10,7 @@ use anyhow::Context;
use regex::Regex;
use tokio::io::AsyncBufReadExt;
use crate::flags::RouterMode;
use dynamo_llm::engines::MultiNodeConfig;
use dynamo_llm::local_model::LocalModel;
use dynamo_runtime::protocols::Endpoint as EndpointId;
......@@ -51,6 +52,12 @@ pub async fn start(
"--context-length".to_string(),
card.context_length.to_string(),
];
// TRTLLM only
// The worker node will only publish events and metrics if the router mode is KV
if flags.router_mode == RouterMode::KV {
args.push("--publish-events-and-metrics".to_string());
}
// sglang only
// vllm uses CUDA_VISIBLE_DEVICES
if flags.base_gpu_id != 0 {
......
......@@ -178,7 +178,11 @@ async def init(runtime: DistributedRuntime, config: Config):
async with get_tensorrtllm_engine(engine_args) as engine:
endpoint = component.endpoint(config.endpoint)
await register_llm(
ModelType.Backend, endpoint, config.model_path, config.model_name
ModelType.Backend,
endpoint,
config.model_path,
config.model_name,
kv_cache_block_size=config.kv_block_size,
)
if config.publish_events_and_metrics:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment