Commit a4d28758 authored by zhuwenwen's avatar zhuwenwen
Browse files

update Q/K/V_SCALE_CONSTANT

parent 04343d9d
...@@ -118,9 +118,9 @@ if TYPE_CHECKING: ...@@ -118,9 +118,9 @@ if TYPE_CHECKING:
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1 VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
VLLM_DISABLE_COMPILE_CACHE: bool = False VLLM_DISABLE_COMPILE_CACHE: bool = False
Q_SCALE_CONSTANT: int = 200 Q_SCALE_CONSTANT: int = 10
K_SCALE_CONSTANT: int = 200 K_SCALE_CONSTANT: int = 10
V_SCALE_CONSTANT: int = 100 V_SCALE_CONSTANT: int = 10
VLLM_SERVER_DEV_MODE: bool = False VLLM_SERVER_DEV_MODE: bool = False
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128 VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
VLLM_MLA_DISABLE: bool = False VLLM_MLA_DISABLE: bool = False
...@@ -1049,13 +1049,13 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1049,13 +1049,13 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Divisor for dynamic query scale factor calculation for FP8 KV Cache # Divisor for dynamic query scale factor calculation for FP8 KV Cache
"Q_SCALE_CONSTANT": "Q_SCALE_CONSTANT":
lambda: int(os.getenv("Q_SCALE_CONSTANT", "200")), lambda: int(os.getenv("Q_SCALE_CONSTANT", "10")),
# Divisor for dynamic key scale factor calculation for FP8 KV Cache # Divisor for dynamic key scale factor calculation for FP8 KV Cache
"K_SCALE_CONSTANT": "K_SCALE_CONSTANT":
lambda: int(os.getenv("K_SCALE_CONSTANT", "200")), lambda: int(os.getenv("K_SCALE_CONSTANT", "10")),
# Divisor for dynamic value scale factor calculation for FP8 KV Cache # Divisor for dynamic value scale factor calculation for FP8 KV Cache
"V_SCALE_CONSTANT": "V_SCALE_CONSTANT":
lambda: int(os.getenv("V_SCALE_CONSTANT", "100")), lambda: int(os.getenv("V_SCALE_CONSTANT", "10")),
# If set, enable multiprocessing in LLM for the V1 code path. # If set, enable multiprocessing in LLM for the V1 code path.
"VLLM_ENABLE_V1_MULTIPROCESSING": "VLLM_ENABLE_V1_MULTIPROCESSING":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment