Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1cbccb6d
Unverified
Commit
1cbccb6d
authored
Jan 27, 2026
by
Matthew Bonanni
Committed by
GitHub
Jan 27, 2026
Browse files
[Attention] Use `has_flashinfer` helper (#33177)
Signed-off-by:
Matthew Bonanni
<
mbonanni@redhat.com
>
parent
bd92089d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
11 deletions
+4
-11
vllm/model_executor/layers/attention/mla_attention.py
vllm/model_executor/layers/attention/mla_attention.py
+4
-11
No files found.
vllm/model_executor/layers/attention/mla_attention.py
View file @
1cbccb6d
...
@@ -229,7 +229,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
...
@@ -229,7 +229,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
get_and_maybe_dequant_weights
,
get_and_maybe_dequant_weights
,
)
)
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils.flashinfer
import
has_nvidia_artifactory
from
vllm.utils.flashinfer
import
has_flashinfer
,
has_nvidia_artifactory
from
vllm.utils.math_utils
import
cdiv
,
round_down
from
vllm.utils.math_utils
import
cdiv
,
round_down
from
vllm.utils.torch_utils
import
(
from
vllm.utils.torch_utils
import
(
direct_register_custom_op
,
direct_register_custom_op
,
...
@@ -599,13 +599,6 @@ except ImportError:
...
@@ -599,13 +599,6 @@ except ImportError:
is_vllm_fa
=
False
is_vllm_fa
=
False
@
functools
.
cache
def
flashinfer_available
()
->
bool
:
import
importlib.util
return
importlib
.
util
.
find_spec
(
"flashinfer"
)
is
not
None
def
dynamic_per_batched_tensor_quant
(
def
dynamic_per_batched_tensor_quant
(
x
:
torch
.
Tensor
,
dtype
:
torch
.
dtype
=
torch
.
float8_e4m3fn
x
:
torch
.
Tensor
,
dtype
:
torch
.
dtype
=
torch
.
float8_e4m3fn
):
):
...
@@ -824,7 +817,7 @@ def use_flashinfer_prefill() -> bool:
...
@@ -824,7 +817,7 @@ def use_flashinfer_prefill() -> bool:
vllm_config
=
get_current_vllm_config
()
vllm_config
=
get_current_vllm_config
()
if
not
(
if
not
(
not
vllm_config
.
attention_config
.
disable_flashinfer_prefill
not
vllm_config
.
attention_config
.
disable_flashinfer_prefill
and
flashinfer
_available
()
and
has_
flashinfer
()
and
not
vllm_config
.
attention_config
.
use_cudnn_prefill
and
not
vllm_config
.
attention_config
.
use_cudnn_prefill
and
current_platform
.
is_device_capability_family
(
100
)
and
current_platform
.
is_device_capability_family
(
100
)
):
):
...
@@ -838,7 +831,7 @@ def use_cudnn_prefill() -> bool:
...
@@ -838,7 +831,7 @@ def use_cudnn_prefill() -> bool:
vllm_config
=
get_current_vllm_config
()
vllm_config
=
get_current_vllm_config
()
return
(
return
(
flashinfer
_available
()
has_
flashinfer
()
and
vllm_config
.
attention_config
.
use_cudnn_prefill
and
vllm_config
.
attention_config
.
use_cudnn_prefill
and
current_platform
.
is_device_capability_family
(
100
)
and
current_platform
.
is_device_capability_family
(
100
)
and
has_nvidia_artifactory
()
and
has_nvidia_artifactory
()
...
@@ -851,7 +844,7 @@ def use_trtllm_ragged_deepseek_prefill() -> bool:
...
@@ -851,7 +844,7 @@ def use_trtllm_ragged_deepseek_prefill() -> bool:
vllm_config
=
get_current_vllm_config
()
vllm_config
=
get_current_vllm_config
()
if
not
(
if
not
(
flashinfer
_available
has_
flashinfer
()
and
vllm_config
.
attention_config
.
use_trtllm_ragged_deepseek_prefill
and
vllm_config
.
attention_config
.
use_trtllm_ragged_deepseek_prefill
and
current_platform
.
is_device_capability_family
(
100
)
and
current_platform
.
is_device_capability_family
(
100
)
):
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment