"src/vscode:/vscode.git/clone" did not exist on "3a237f4fa25dd5a3f354428f0e2e869d08089dc7"
Unverified Commit a1816187 authored by weiliang's avatar weiliang Committed by GitHub
Browse files

Fix Flashinfer Backend for SM120 Usage (#12325)

parent e39628fd
......@@ -26,8 +26,8 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMo
from sglang.srt.speculative.spec_info import SpecInput
from sglang.srt.utils import (
get_int_env_var,
is_blackwell_supported,
is_flashinfer_available,
is_sm100_supported,
next_power_of_2,
)
......@@ -229,7 +229,7 @@ class FlashInferAttnBackend(AttentionBackend):
]
fmha_backend = "auto"
if is_blackwell_supported():
if is_sm100_supported():
# Disable CUTLASS backend when piecewise cuda graph is enabled
# due to TMA descriptor initialization issues on B200
if model_runner.server_args.enable_piecewise_cuda_graph:
......
......@@ -25,8 +25,8 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMo
from sglang.srt.server_args import get_global_server_args
from sglang.srt.speculative.spec_info import SpecInput
from sglang.srt.utils import (
is_blackwell_supported,
is_flashinfer_available,
is_sm100_supported,
next_power_of_2,
)
......@@ -242,9 +242,11 @@ class FlashInferMLAAttnBackend(AttentionBackend):
else:
self.q_indptr_decode = q_indptr_decode_buf
self.fmha_backend = "auto"
if is_blackwell_supported():
if is_sm100_supported():
self.fmha_backend = "cutlass"
else:
self.fmha_backend = "auto"
self.prefill_wrapper_ragged = BatchPrefillWithRaggedKVCacheWrapper(
self.workspace_buffer, "NHD", backend=self.fmha_backend
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment