Unverified Commit 8221f9ae authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Tiny cleanup some eagle unused codes (#11660)

parent ab9187a2
......@@ -96,7 +96,6 @@ class DraftBackendFactory:
FlashInferMultiStepDraftBackend,
)
self.has_prefill_wrapper_verify = True
return FlashInferMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps
)
......@@ -105,7 +104,6 @@ class DraftBackendFactory:
FlashInferMLAMultiStepDraftBackend,
)
self.has_prefill_wrapper_verify = True
return FlashInferMLAMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps
)
......@@ -149,7 +147,6 @@ class DraftBackendFactory:
TRTLLMHAAttnMultiStepDraftBackend,
)
self.has_prefill_wrapper_verify = True
return TRTLLMHAAttnMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps
)
......@@ -164,7 +161,6 @@ class DraftBackendFactory:
TRTLLMMLAMultiStepDraftBackend,
)
self.has_prefill_wrapper_verify = True
return TRTLLMMLAMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps
)
......
......@@ -41,7 +41,6 @@ class EAGLEDraftCudaGraphRunner:
# Parse args
self.eagle_worker = eagle_worker
self.model_runner = model_runner = eagle_worker.model_runner
self.model_runner: EAGLEWorker
self.graphs = {}
self.output_buffers = {}
self.enable_torch_compile = model_runner.server_args.enable_torch_compile
......
......@@ -192,10 +192,6 @@ class EAGLEWorker(TpModelWorker):
def init_attention_backend(self):
# Create multi-step attn backends and cuda graph runners
self.has_prefill_wrapper_verify = False
self.draft_extend_attn_backend = None
draft_backend_factory = DraftBackendFactory(
self.server_args,
self.draft_model_runner,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment