Unverified Commit 2518230d authored by billishyahao's avatar billishyahao Committed by GitHub
Browse files

[MISC] Fix misleading batch_size_capture_list when cuda_graph_sizes < 4 (#25829)


Signed-off-by: default avatarbillishyahao <bill.he@amd.com>
Co-authored-by: default avatarLuka Govedic <ProExpertProg@users.noreply.github.com>
parent a332b845
...@@ -580,9 +580,12 @@ class VllmConfig: ...@@ -580,9 +580,12 @@ class VllmConfig:
not self.model_config.enforce_eager: not self.model_config.enforce_eager:
cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
if len(cuda_graph_sizes) == 1: if len(cuda_graph_sizes) == 1:
batch_size_capture_list = [1, 2, 4] + [ max_graph_size = cuda_graph_sizes[0]
i for i in range(8, cuda_graph_sizes[0] + 1, 8) assert max_graph_size >= 1, "Maximum cudagraph size should be" \
] " greater than or equal to 1."
batch_size_capture_list = [
i for i in [1, 2, 4] if i <= max_graph_size
] + list(range(8, max_graph_size + 1, 8))
elif len(cuda_graph_sizes) > 1: elif len(cuda_graph_sizes) > 1:
batch_size_capture_list = sorted(cuda_graph_sizes) batch_size_capture_list = sorted(cuda_graph_sizes)
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment