"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "81d8d7b73f063434d71c83533dc7dcf23e4b5ecc"
Unverified Commit 53f7874a authored by valarLip's avatar valarLip Committed by GitHub
Browse files

refine aiter_backend for mtp (#7279)


Co-authored-by: default avatarHAI <hixiao@gmail.com>
parent 61a46804
...@@ -1722,6 +1722,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin): ...@@ -1722,6 +1722,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
or attention_backend_str == "cutlass_mla" or attention_backend_str == "cutlass_mla"
or attention_backend_str == "ascend" or attention_backend_str == "ascend"
or attention_backend_str == "trtllm_mha" or attention_backend_str == "trtllm_mha"
or attention_backend_str == "aiter"
or global_server_args_dict["enable_two_batch_overlap"] or global_server_args_dict["enable_two_batch_overlap"]
): ):
seq_lens_cpu = ( seq_lens_cpu = (
......
...@@ -226,6 +226,22 @@ class EAGLEWorker(TpModelWorker): ...@@ -226,6 +226,22 @@ class EAGLEWorker(TpModelWorker):
self.draft_model_runner, self.draft_model_runner,
skip_prefill=False, skip_prefill=False,
) )
elif self.server_args.attention_backend == "aiter":
from sglang.srt.layers.attention.aiter_backend import (
AiterAttnBackend,
AiterMultiStepDraftBackend,
)
self.draft_attn_backend = AiterMultiStepDraftBackend(
self.draft_model_runner,
self.topk,
self.speculative_num_steps,
)
self.draft_extend_attn_backend = AiterAttnBackend(
self.draft_model_runner,
skip_prefill=False,
)
self.has_prefill_wrapper_verify = False
elif self.server_args.attention_backend == "fa3": elif self.server_args.attention_backend == "fa3":
from sglang.srt.layers.attention.flashattention_backend import ( from sglang.srt.layers.attention.flashattention_backend import (
FlashAttentionBackend, FlashAttentionBackend,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment