help="Load balancing policy to use: random, round_robin, or approx_tree"
)]
policy:PolicyType,
#[arg(
long,
requires="policy",
required_if_eq("policy","approx_tree"),
help="Path to the tokenizer file, required when using approx_tree policy"
)]
tokenizer_path:Option<String>,
#[arg(
long,
default_value="0.50",
requires="policy",
required_if_eq("policy","approx_tree"),
help="Cache threshold (0.0-1.0) for approx_tree routing. Routes to cached worker if match rate exceeds threshold, otherwise routes to shortest queue worker"