Commit 5876ee95 authored by zhuwenwen's avatar zhuwenwen
Browse files

remove conch-triton-kernels and skip AiterFlashAttentionMetadata

parent 185d5e7c
......@@ -17,4 +17,4 @@ setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0
conch-triton-kernels==1.2.1
# conch-triton-kernels==1.2.1
......@@ -20,8 +20,8 @@ from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
from vllm.platforms import current_platform
from vllm.utils import is_pin_memory_available
from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
from vllm.v1.attention.backends.rocm_aiter_fa import (
AiterFlashAttentionMetadata)
# from vllm.v1.attention.backends.rocm_aiter_fa import (
# AiterFlashAttentionMetadata)
from vllm.v1.attention.backends.tree_attn import (TreeAttentionMetadata,
TreeAttentionMetadataBuilder)
from vllm.v1.attention.backends.triton_attn import TritonAttentionMetadata
......@@ -231,7 +231,7 @@ class EagleProposer:
if current_platform.is_rocm():
assert isinstance(
attn_metadata,
(TritonAttentionMetadata, AiterFlashAttentionMetadata,
(TritonAttentionMetadata, # AiterFlashAttentionMetadata,
FlashAttentionMetadata))
else:
# Currently, only FlashAttention supports multi-token eagle spec
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment