Commit 1d6ca12a authored by zhuwenwen's avatar zhuwenwen
Browse files

update cuda_graph_sizes

parent 01ffd437
...@@ -3787,7 +3787,7 @@ class VllmConfig: ...@@ -3787,7 +3787,7 @@ class VllmConfig:
batch_size_capture_list = [] batch_size_capture_list = []
if self.model_config is not None and \ if self.model_config is not None and \
not self.model_config.enforce_eager: not self.model_config.enforce_eager:
if self.model_config.use_mla and self.compilation_config.full_cuda_graph and self.scheduler_config.max_num_seqs<=256: if self.model_config.use_mla and self.compilation_config.full_cuda_graph and self.scheduler_config.max_num_seqs<=512:
cuda_graph_sizes = [self.scheduler_config.max_num_seqs] cuda_graph_sizes = [self.scheduler_config.max_num_seqs]
else: else:
cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment