Commit 3c74c91a authored by zhuwenwen's avatar zhuwenwen
Browse files

set VLLM_USE_FUSED_QA_KVA_GEMM=1

parent a69f4902
......@@ -1344,7 +1344,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Only quantized DeepSeek models supported.
# Unquantized versions are not supported.
"VLLM_USE_FUSED_QA_KVA_GEMM":
lambda: (os.environ.get("VLLM_USE_FUSED_QA_KVA_GEMM", "False").lower() in
lambda: (os.environ.get("VLLM_USE_FUSED_QA_KVA_GEMM", "True").lower() in
("true", "1")),
"VLLM_ZERO_OVERHEAD_ENHANCE":
lambda: (os.getenv('VLLM_ZERO_OVERHEAD_ENHANCE', '0').lower() in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment