Unverified Commit 4ac510f4 authored by bnellnm's avatar bnellnm Committed by GitHub
Browse files

[Kernels] Enable DeepGEMM by default (#24462)


Signed-off-by: default avatarBill Nell <bnell@redhat.com>
parent 7fb2a5be
...@@ -135,7 +135,7 @@ if TYPE_CHECKING: ...@@ -135,7 +135,7 @@ if TYPE_CHECKING:
VLLM_TPU_BUCKET_PADDING_GAP: int = 0 VLLM_TPU_BUCKET_PADDING_GAP: int = 0
VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None
VLLM_TPU_USING_PATHWAYS: bool = False VLLM_TPU_USING_PATHWAYS: bool = False
VLLM_USE_DEEP_GEMM: bool = False VLLM_USE_DEEP_GEMM: bool = True
VLLM_USE_DEEP_GEMM_E8M0: bool = True VLLM_USE_DEEP_GEMM_E8M0: bool = True
VLLM_USE_DEEP_GEMM_E8M0_HOPPER: bool = False VLLM_USE_DEEP_GEMM_E8M0_HOPPER: bool = False
VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False
...@@ -1044,7 +1044,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1044,7 +1044,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Allow use of DeepGemm kernels for fused moe ops. # Allow use of DeepGemm kernels for fused moe ops.
"VLLM_USE_DEEP_GEMM": "VLLM_USE_DEEP_GEMM":
lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "0"))), lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "1"))),
# Whether to use E8M0 scaling when DeepGEMM is used on Blackwell GPUs. # Whether to use E8M0 scaling when DeepGEMM is used on Blackwell GPUs.
"VLLM_USE_DEEP_GEMM_E8M0": "VLLM_USE_DEEP_GEMM_E8M0":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment