Commit 155fe0d2 authored by zhuwenwen's avatar zhuwenwen Committed by jujl1
Browse files

use schedule_split_pd (dpsk mtp + fullgraph)

parent 22a14b84
...@@ -152,6 +152,10 @@ class Scheduler(SchedulerInterface): ...@@ -152,6 +152,10 @@ class Scheduler(SchedulerInterface):
if speculative_config.use_eagle(): if speculative_config.use_eagle():
self.use_eagle = True self.use_eagle = True
self.num_lookahead_tokens = self.num_spec_tokens self.num_lookahead_tokens = self.num_spec_tokens
self.compilation_config = vllm_config.compilation_config
self.full_cuda_graph = self.compilation_config.full_cuda_graph
self.use_mla = vllm_config.model_config.use_mla
# Create the KV cache manager. # Create the KV cache manager.
self.kv_cache_manager = KVCacheManager( self.kv_cache_manager = KVCacheManager(
...@@ -1024,7 +1028,7 @@ class Scheduler(SchedulerInterface): ...@@ -1024,7 +1028,7 @@ class Scheduler(SchedulerInterface):
return scheduler_output return scheduler_output
def schedule(self) -> SchedulerOutput: def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT: if envs.VLLM_USE_PD_SPLIT or (self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0) :
return self.schedule_split_pd() return self.schedule_split_pd()
else: else:
return self.schedule_default() return self.schedule_default()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment