Commit 3c74c91a authored by zhuwenwen's avatar zhuwenwen
Browse files

set VLLM_USE_FUSED_QA_KVA_GEMM=1

parent a69f4902
...@@ -1344,7 +1344,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1344,7 +1344,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Only quantized DeepSeek models supported. # Only quantized DeepSeek models supported.
# Unquantized versions are not supported. # Unquantized versions are not supported.
"VLLM_USE_FUSED_QA_KVA_GEMM": "VLLM_USE_FUSED_QA_KVA_GEMM":
lambda: (os.environ.get("VLLM_USE_FUSED_QA_KVA_GEMM", "False").lower() in lambda: (os.environ.get("VLLM_USE_FUSED_QA_KVA_GEMM", "True").lower() in
("true", "1")), ("true", "1")),
"VLLM_ZERO_OVERHEAD_ENHANCE": "VLLM_ZERO_OVERHEAD_ENHANCE":
lambda: (os.getenv('VLLM_ZERO_OVERHEAD_ENHANCE', '0').lower() in lambda: (os.getenv('VLLM_ZERO_OVERHEAD_ENHANCE', '0').lower() in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment