Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cfad6a50
Commit
cfad6a50
authored
Apr 01, 2026
by
khluu
Browse files
Revert "[Bugfix] Restrict TRTLLM attention to SM100, fixing GB300 (SM103) hang (#38730)"
This reverts commit
c284a667
.
parent
c284a667
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
14 deletions
+10
-14
docs/design/attention_backends.md
docs/design/attention_backends.md
+1
-1
tools/pre_commit/generate_attention_backend_docs.py
tools/pre_commit/generate_attention_backend_docs.py
+5
-9
vllm/utils/flashinfer.py
vllm/utils/flashinfer.py
+4
-4
No files found.
docs/design/attention_backends.md
View file @
cfad6a50
...
...
@@ -167,7 +167,7 @@ Priority is **1 = highest** (tried first).
| ------- | ------- | ------ | --------- | ----------- | ---------- | ---- | --------- | --- | --------------- | ------------ |
|
`CPU_ATTN`
| | fp16, bf16, fp32 |
`auto`
| Any | 32, 64, 80, 96, 112, 128, 160, 192, 224, 256 | ❌ | ❌ | ❌ | All | N/A |
|
`FLASHINFER`
| Native† | fp16, bf16 |
`auto`
,
`float16`
,
`bfloat16`
,
`fp8`
,
`fp8_e4m3`
,
`fp8_e5m2`
| 16, 32, 64 | 64, 128, 256 | ❌ | ❌ | ✅ | Decoder | 7.x-9.x |
|
`FLASHINFER`
| TRTLLM† | fp16, bf16 |
`auto`
,
`float16`
,
`bfloat16`
,
`fp8`
,
`fp8_e4m3`
,
`fp8_e5m2`
| 16, 32, 64 | 64, 128, 256 | ✅ | ❌ | ✅ | Decoder | 10.
0
|
|
`FLASHINFER`
| TRTLLM† | fp16, bf16 |
`auto`
,
`float16`
,
`bfloat16`
,
`fp8`
,
`fp8_e4m3`
,
`fp8_e5m2`
| 16, 32, 64 | 64, 128, 256 | ✅ | ❌ | ✅ | Decoder | 10.
x
|
|
`FLASH_ATTN`
| FA2
*
| fp16, bf16 |
`auto`
,
`float16`
,
`bfloat16`
| %16 | Any | ❌ | ❌ | ✅ | All | ≥8.0 |
|
`FLASH_ATTN`
| FA3
*
| fp16, bf16 |
`auto`
,
`float16`
,
`bfloat16`
,
`fp8`
,
`fp8_e4m3`
,
`fp8_e5m2`
| %16 | Any | ✅ | ❌ | ✅ | All | 9.x |
|
`FLASH_ATTN`
| FA4
*
| fp16, bf16 |
`auto`
,
`float16`
,
`bfloat16`
| %16 | Any | ❌ | ❌ | ✅ | All | ≥10.0 |
...
...
tools/pre_commit/generate_attention_backend_docs.py
View file @
cfad6a50
...
...
@@ -235,11 +235,10 @@ def _resolve_import_to_file(
def
_find_cc_in_function
(
tree
:
ast
.
AST
,
func_name
:
str
)
->
str
|
None
:
"""Find a compute capability from is_device_capability
*
() calls in a function.
"""Find a compute capability from is_device_capability
_family
() calls in a function.
Handles two patterns:
- is_device_capability_family(N): "M.x" (e.g. 100 -> "10.x")
- is_device_capability(N): "M.m" (e.g. 100 -> "10.0")
Looks for the pattern: current_platform.is_device_capability_family(N)
and converts N (e.g. 100) to a CC string (e.g. "10.x").
"""
for
node
in
ast
.
walk
(
tree
):
if
not
isinstance
(
node
,
ast
.
FunctionDef
)
or
node
.
name
!=
func_name
:
...
...
@@ -248,15 +247,12 @@ def _find_cc_in_function(tree: ast.AST, func_name: str) -> str | None:
if
(
isinstance
(
n
,
ast
.
Call
)
and
isinstance
(
n
.
func
,
ast
.
Attribute
)
and
n
.
func
.
attr
==
"is_device_capability_family"
and
n
.
args
and
isinstance
(
n
.
args
[
0
],
ast
.
Constant
)
and
isinstance
(
n
.
args
[
0
].
value
,
int
)
):
val
=
n
.
args
[
0
].
value
if
n
.
func
.
attr
==
"is_device_capability_family"
:
return
f
"
{
val
//
10
}
.x"
elif
n
.
func
.
attr
==
"is_device_capability"
:
return
f
"
{
val
//
10
}
.
{
val
%
10
}
"
return
f
"
{
n
.
args
[
0
].
value
//
10
}
.x"
return
None
...
...
vllm/utils/flashinfer.py
View file @
cfad6a50
...
...
@@ -289,10 +289,10 @@ def supports_trtllm_attention() -> bool:
if
envs
.
VLLM_BATCH_INVARIANT
:
return
False
#
TRTLLM attention is currently only validated on SM100 (CC 10.0).
# SM103 (GB300) hangs with FlashInfer >= 0.6.7.
# See: https://github.com/flashinfer-ai/flashinfer/issues/2939
return
current_platform
.
is_device_capability
(
100
)
and
has_nvidia_artifactory
(
)
#
Requires SM100 and NVIDIA artifactory to be accessible to download cubins
return
(
current_platform
.
is_device_capability_family
(
100
)
and
has_nvidia_artifactory
()
)
def
force_use_trtllm_attention
()
->
bool
|
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment