"--http-port",type=int,default=8080,help="HTTP port for the engine (u16)."
"--http-port",type=int,default=8080,help="HTTP port for the engine (u16)."
)
)
parser.add_argument(
"--router-mode",
type=str,
choices=["round-robin","random","kv"],
default="round-robin",
help="How to route the request",
)
parser.add_argument(
"--kv-overlap-score-weight",
type=float,
default=1.0,
help="KV Router: Weight for overlap score in worker selection. Higher values prioritize KV cache reuse.",
)
parser.add_argument(
"--router-temperature",
type=float,
default=0.0,
help="KV Router: Temperature for worker sampling via softmax. Higher values promote more randomness, and 0 fallbacks to deterministic.",
)
parser.add_argument(
"--kv-events",
action="store_true",
dest="use_kv_events",
help=" KV Router: Whether to use KV events to maintain the view of cached blocks. If false, would use ApproxKvRouter for predicting block creation / deletion based only on incoming requests at a timer.",