Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bbbd696a
Unverified
Commit
bbbd696a
authored
Jan 23, 2026
by
Luka Govedič
Committed by
GitHub
Jan 23, 2026
Browse files
[torch.compile][CI] Add back attn fusion on hopper/ada (#32940)
Signed-off-by:
Luka Govedič
<
lgovedic@redhat.com
>
parent
9b77bb79
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
5 deletions
+4
-5
tests/compile/test_fusion_attn.py
tests/compile/test_fusion_attn.py
+4
-5
No files found.
tests/compile/test_fusion_attn.py
View file @
bbbd696a
...
...
@@ -18,7 +18,7 @@ from tests.compile.fusion_test_utils import (
is_blackwell
,
run_model
,
)
from
tests.utils
import
cuda_device_count_stateless
,
flat_product
from
tests.utils
import
flat_product
from
tests.v1.attention.utils
import
BatchSpec
,
create_common_attn_metadata
from
vllm._custom_ops
import
cutlass_scaled_fp4_mm
,
scaled_fp4_quant
from
vllm.attention.layer
import
Attention
...
...
@@ -265,13 +265,13 @@ if current_platform.is_cuda():
HEADS
=
[(
64
,
8
),
(
40
,
8
)]
PATTERN_TEST_MODELS_FP8
=
[
(
"
nvidia/Llama-4-Scout-17B-16E-Instruct
-FP8"
,
"
RedHatAI/Meta-Llama-3.1-8B
-FP8"
,
TestAttentionFp8StaticQuantPatternModel
,
)
]
PATTERN_TEST_MODELS_FP4
=
[
(
"nvidia/Llama-
4-Scout-17B-16E
-Instruct-FP4"
,
"nvidia/Llama-
3.1-8B
-Instruct-
NV
FP4"
,
TestAttentionNvfp4QuantPatternModel
,
)
]
...
...
@@ -331,9 +331,8 @@ def test_attention_quant_pattern(
if
backend
==
AttentionBackendEnum
.
FLASHINFER
and
(
not
current_platform
.
is_device_capability
((
10
,
0
))
or
not
has_flashinfer
()
):
# This also captures the FP4 case
pytest
.
skip
(
"FlashInfer attn fusion requires Blackwell and flashinfer"
)
if
"Llama-4-Scout"
in
model_name
and
cuda_device_count_stateless
()
<
2
:
pytest
.
skip
(
"Llama-4-Scout requires at least 2 GPUs"
)
custom_ops_list
=
custom_ops
.
split
(
","
)
if
custom_ops
else
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment