Commit 155fe0d2 authored by zhuwenwen's avatar zhuwenwen Committed by jujl1
Browse files

use schedule_split_pd (dpsk mtp + fullgraph)

parent 22a14b84
......@@ -153,6 +153,10 @@ class Scheduler(SchedulerInterface):
self.use_eagle = True
self.num_lookahead_tokens = self.num_spec_tokens
self.compilation_config = vllm_config.compilation_config
self.full_cuda_graph = self.compilation_config.full_cuda_graph
self.use_mla = vllm_config.model_config.use_mla
# Create the KV cache manager.
self.kv_cache_manager = KVCacheManager(
kv_cache_config=kv_cache_config,
......@@ -1024,7 +1028,7 @@ class Scheduler(SchedulerInterface):
return scheduler_output
def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT:
if envs.VLLM_USE_PD_SPLIT or (self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0) :
return self.schedule_split_pd()
else:
return self.schedule_default()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment