Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e5f2ff72
Commit
e5f2ff72
authored
Feb 03, 2026
by
zhuwenwen
Browse files
skip aiter
parent
0386844b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
25 deletions
+24
-25
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+0
-1
vllm/v1/spec_decode/eagle.py
vllm/v1/spec_decode/eagle.py
+24
-24
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
e5f2ff72
...
...
@@ -1943,7 +1943,6 @@ class FusedMoE(CustomOp):
topk_weights
,
topk_ids
=
self
.
router
.
select_experts
(
hidden_states
=
x_orig
,
router_logits
=
router_logits
,
# use_fused_gate=use_fused_gate,
)
if
self
.
capture
is
not
None
:
...
...
vllm/v1/spec_decode/eagle.py
View file @
e5f2ff72
...
...
@@ -167,36 +167,36 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization.
self
.
allowed_attn_types
:
tuple
|
None
=
None
if
current_platform
.
is_rocm
():
from
vllm.v1.attention.backends.rocm_attn
import
RocmAttentionMetadata
#
if current_platform.is_rocm():
#
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types
=
[
TritonAttentionMetadata
,
RocmAttentionMetadata
,
]
# ROCM_AITER_FA is an optional backend
if
find_spec
(
AttentionBackendEnum
.
ROCM_AITER_FA
.
get_path
(
include_classname
=
False
)
):
from
vllm.v1.attention.backends.rocm_aiter_fa
import
(
AiterFlashAttentionMetadata
,
)
#
rocm_types = [
#
TritonAttentionMetadata,
#
RocmAttentionMetadata,
#
]
#
# ROCM_AITER_FA is an optional backend
#
if find_spec(
#
AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False)
#
):
#
from vllm.v1.attention.backends.rocm_aiter_fa import (
#
AiterFlashAttentionMetadata,
#
)
rocm_types
.
append
(
AiterFlashAttentionMetadata
)
#
rocm_types.append(AiterFlashAttentionMetadata)
# TRITON_MLA backend support for MLA models (e.g., DeepSeek)
from
vllm.model_executor.layers.attention.mla_attention
import
(
MLACommonMetadata
,
)
#
# TRITON_MLA backend support for MLA models (e.g., DeepSeek)
#
from vllm.model_executor.layers.attention.mla_attention import (
#
MLACommonMetadata,
#
)
rocm_types
.
append
(
MLACommonMetadata
)
#
rocm_types.append(MLACommonMetadata)
# FlexAttention backend support
from
vllm.v1.attention.backends.flex_attention
import
FlexAttentionMetadata
#
# FlexAttention backend support
#
from vllm.v1.attention.backends.flex_attention import FlexAttentionMetadata
rocm_types
.
append
(
FlexAttentionMetadata
)
#
rocm_types.append(FlexAttentionMetadata)
self
.
allowed_attn_types
=
tuple
(
rocm_types
)
#
self.allowed_attn_types = tuple(rocm_types)
# Parse the speculative token tree.
spec_token_tree
=
self
.
speculative_config
.
speculative_token_tree
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment