"vscode:/vscode.git/clone" did not exist on "f8a08cb90dc0b5b45663cd2605d0c98c77efe009"
Commit ab674544 authored by zhuwenwen's avatar zhuwenwen
Browse files

update Q/K/V_SCALE_CONSTANT

parent 39ff5a5a
......@@ -100,9 +100,9 @@ if TYPE_CHECKING:
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
VLLM_DISABLE_COMPILE_CACHE: bool = False
Q_SCALE_CONSTANT: int = 200
K_SCALE_CONSTANT: int = 200
V_SCALE_CONSTANT: int = 100
Q_SCALE_CONSTANT: int = 10
K_SCALE_CONSTANT: int = 10
V_SCALE_CONSTANT: int = 10
VLLM_SERVER_DEV_MODE: bool = False
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
VLLM_MLA_DISABLE: bool = False
......@@ -821,13 +821,13 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Divisor for dynamic query scale factor calculation for FP8 KV Cache
"Q_SCALE_CONSTANT":
lambda: int(os.getenv("Q_SCALE_CONSTANT", "200")),
lambda: int(os.getenv("Q_SCALE_CONSTANT", "10")),
# Divisor for dynamic key scale factor calculation for FP8 KV Cache
"K_SCALE_CONSTANT":
lambda: int(os.getenv("K_SCALE_CONSTANT", "200")),
lambda: int(os.getenv("K_SCALE_CONSTANT", "10")),
# Divisor for dynamic value scale factor calculation for FP8 KV Cache
"V_SCALE_CONSTANT":
lambda: int(os.getenv("V_SCALE_CONSTANT", "100")),
lambda: int(os.getenv("V_SCALE_CONSTANT", "10")),
# If set, enable multiprocessing in LLM for the V1 code path.
"VLLM_ENABLE_V1_MULTIPROCESSING":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment