Commit 5876ee95 authored by zhuwenwen's avatar zhuwenwen
Browse files

remove conch-triton-kernels and skip AiterFlashAttentionMetadata

parent 185d5e7c
...@@ -17,4 +17,4 @@ setuptools>=77.0.3,<80.0.0 ...@@ -17,4 +17,4 @@ setuptools>=77.0.3,<80.0.0
setuptools-scm>=8 setuptools-scm>=8
runai-model-streamer==0.11.0 runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0 runai-model-streamer-s3==0.11.0
conch-triton-kernels==1.2.1 # conch-triton-kernels==1.2.1
...@@ -20,8 +20,8 @@ from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM ...@@ -20,8 +20,8 @@ from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import is_pin_memory_available from vllm.utils import is_pin_memory_available
from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
from vllm.v1.attention.backends.rocm_aiter_fa import ( # from vllm.v1.attention.backends.rocm_aiter_fa import (
AiterFlashAttentionMetadata) # AiterFlashAttentionMetadata)
from vllm.v1.attention.backends.tree_attn import (TreeAttentionMetadata, from vllm.v1.attention.backends.tree_attn import (TreeAttentionMetadata,
TreeAttentionMetadataBuilder) TreeAttentionMetadataBuilder)
from vllm.v1.attention.backends.triton_attn import TritonAttentionMetadata from vllm.v1.attention.backends.triton_attn import TritonAttentionMetadata
...@@ -231,7 +231,7 @@ class EagleProposer: ...@@ -231,7 +231,7 @@ class EagleProposer:
if current_platform.is_rocm(): if current_platform.is_rocm():
assert isinstance( assert isinstance(
attn_metadata, attn_metadata,
(TritonAttentionMetadata, AiterFlashAttentionMetadata, (TritonAttentionMetadata, # AiterFlashAttentionMetadata,
FlashAttentionMetadata)) FlashAttentionMetadata))
else: else:
# Currently, only FlashAttention supports multi-token eagle spec # Currently, only FlashAttention supports multi-token eagle spec
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment