Unverified Commit 8221f9ae authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Tiny cleanup some eagle unused codes (#11660)

parent ab9187a2
...@@ -96,7 +96,6 @@ class DraftBackendFactory: ...@@ -96,7 +96,6 @@ class DraftBackendFactory:
FlashInferMultiStepDraftBackend, FlashInferMultiStepDraftBackend,
) )
self.has_prefill_wrapper_verify = True
return FlashInferMultiStepDraftBackend( return FlashInferMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps self.draft_model_runner, self.topk, self.speculative_num_steps
) )
...@@ -105,7 +104,6 @@ class DraftBackendFactory: ...@@ -105,7 +104,6 @@ class DraftBackendFactory:
FlashInferMLAMultiStepDraftBackend, FlashInferMLAMultiStepDraftBackend,
) )
self.has_prefill_wrapper_verify = True
return FlashInferMLAMultiStepDraftBackend( return FlashInferMLAMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps self.draft_model_runner, self.topk, self.speculative_num_steps
) )
...@@ -149,7 +147,6 @@ class DraftBackendFactory: ...@@ -149,7 +147,6 @@ class DraftBackendFactory:
TRTLLMHAAttnMultiStepDraftBackend, TRTLLMHAAttnMultiStepDraftBackend,
) )
self.has_prefill_wrapper_verify = True
return TRTLLMHAAttnMultiStepDraftBackend( return TRTLLMHAAttnMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps self.draft_model_runner, self.topk, self.speculative_num_steps
) )
...@@ -164,7 +161,6 @@ class DraftBackendFactory: ...@@ -164,7 +161,6 @@ class DraftBackendFactory:
TRTLLMMLAMultiStepDraftBackend, TRTLLMMLAMultiStepDraftBackend,
) )
self.has_prefill_wrapper_verify = True
return TRTLLMMLAMultiStepDraftBackend( return TRTLLMMLAMultiStepDraftBackend(
self.draft_model_runner, self.topk, self.speculative_num_steps self.draft_model_runner, self.topk, self.speculative_num_steps
) )
......
...@@ -41,7 +41,6 @@ class EAGLEDraftCudaGraphRunner: ...@@ -41,7 +41,6 @@ class EAGLEDraftCudaGraphRunner:
# Parse args # Parse args
self.eagle_worker = eagle_worker self.eagle_worker = eagle_worker
self.model_runner = model_runner = eagle_worker.model_runner self.model_runner = model_runner = eagle_worker.model_runner
self.model_runner: EAGLEWorker
self.graphs = {} self.graphs = {}
self.output_buffers = {} self.output_buffers = {}
self.enable_torch_compile = model_runner.server_args.enable_torch_compile self.enable_torch_compile = model_runner.server_args.enable_torch_compile
......
...@@ -192,10 +192,6 @@ class EAGLEWorker(TpModelWorker): ...@@ -192,10 +192,6 @@ class EAGLEWorker(TpModelWorker):
def init_attention_backend(self): def init_attention_backend(self):
# Create multi-step attn backends and cuda graph runners # Create multi-step attn backends and cuda graph runners
self.has_prefill_wrapper_verify = False
self.draft_extend_attn_backend = None
draft_backend_factory = DraftBackendFactory( draft_backend_factory = DraftBackendFactory(
self.server_args, self.server_args,
self.draft_model_runner, self.draft_model_runner,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment