update Q/K/V_SCALE_CONSTANT

ab674544 · zhuwenwen · 39ff5a5a · ab674544
Commit ab674544 authored Feb 11, 2026 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 6 deletions

vllm/envs.py vllm/envs.py +6 -6

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -100,9 +100,9 @@ if TYPE_CHECKING:
    VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
    VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
    VLLM_DISABLE_COMPILE_CACHE: bool = False
-    Q_SCALE_CONSTANT: int = 200
-    K_SCALE_CONSTANT: int = 200
-    V_SCALE_CONSTANT: int = 100
+    Q_SCALE_CONSTANT: int = 10
+    K_SCALE_CONSTANT: int = 10
+    V_SCALE_CONSTANT: int = 10
    VLLM_SERVER_DEV_MODE: bool = False
    VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
    VLLM_MLA_DISABLE: bool = False
@@ -821,13 +821,13 @@ environment_variables: dict[str, Callable[[], Any]] = {

    # Divisor for dynamic query scale factor calculation for FP8 KV Cache
    "Q_SCALE_CONSTANT":
-    lambda: int(os.getenv("Q_SCALE_CONSTANT", "200")),
+    lambda: int(os.getenv("Q_SCALE_CONSTANT", "10")),
    # Divisor for dynamic key scale factor calculation for FP8 KV Cache
    "K_SCALE_CONSTANT":
-    lambda: int(os.getenv("K_SCALE_CONSTANT", "200")),
+    lambda: int(os.getenv("K_SCALE_CONSTANT", "10")),
    # Divisor for dynamic value scale factor calculation for FP8 KV Cache
    "V_SCALE_CONSTANT":
-    lambda: int(os.getenv("V_SCALE_CONSTANT", "100")),
+    lambda: int(os.getenv("V_SCALE_CONSTANT", "10")),

    # If set, enable multiprocessing in LLM for the V1 code path.
    "VLLM_ENABLE_V1_MULTIPROCESSING":