Unverified Commit c65f1ac0 authored by Vladislav Nosivskoy's avatar Vladislav Nosivskoy Committed by GitHub
Browse files

fix: respect router env vars in frontend configuration (#4431)


Signed-off-by: default avatarVladislav Nosivskoy <vladnosiv@gmail.com>
parent 81491b39
...@@ -78,7 +78,10 @@ def parse_args(): ...@@ -78,7 +78,10 @@ def parse_args():
"-i", "--interactive", action="store_true", help="Interactive text chat" "-i", "--interactive", action="store_true", help="Interactive text chat"
) )
parser.add_argument( parser.add_argument(
"--kv-cache-block-size", type=int, help="KV cache block size (u32)." "--kv-cache-block-size",
type=int,
default=os.environ.get("DYN_KV_CACHE_BLOCK_SIZE"),
help="KV cache block size (u32). Can be set via DYN_KV_CACHE_BLOCK_SIZE env var.",
) )
parser.add_argument( parser.add_argument(
"--http-host", "--http-host",
...@@ -114,20 +117,20 @@ def parse_args(): ...@@ -114,20 +117,20 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--kv-overlap-score-weight", "--kv-overlap-score-weight",
type=float, type=float,
default=1.0, default=float(os.environ.get("DYN_KV_OVERLAP_SCORE_WEIGHT", "1.0")),
help="KV Router: Weight for overlap score in worker selection. Higher values prioritize KV cache reuse.", help="KV Router: Weight for overlap score in worker selection. Higher values prioritize KV cache reuse.",
) )
parser.add_argument( parser.add_argument(
"--router-temperature", "--router-temperature",
type=float, type=float,
default=0.0, default=float(os.environ.get("DYN_ROUTER_TEMPERATURE", "0.0")),
help="KV Router: Temperature for worker sampling via softmax. Higher values promote more randomness, and 0 fallbacks to deterministic.", help="KV Router: Temperature for worker sampling via softmax. Higher values promote more randomness, and 0 fallbacks to deterministic.",
) )
parser.add_argument( parser.add_argument(
"--no-kv-events", "--no-kv-events",
action="store_false", action="store_false",
dest="use_kv_events", dest="use_kv_events",
default=True, default=os.environ.get("DYN_KV_EVENTS", "true").lower() != "false",
help="KV Router: Disable KV events. When set, uses ApproxKvRouter for predicting block creation/deletion based only on incoming requests at a timer. By default, KV events are enabled.", help="KV Router: Disable KV events. When set, uses ApproxKvRouter for predicting block creation/deletion based only on incoming requests at a timer. By default, KV events are enabled.",
) )
parser.add_argument( parser.add_argument(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment