"deploy/vscode:/vscode.git/clone" did not exist on "bce060d2800b58308a39ea5cfeb9aa795dc82a76"
Unverified Commit f2414f51 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: reduce max ISL when benchmarking prefill (#4030)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 6afa679c
...@@ -31,6 +31,13 @@ def _profile_prefill_helper( ...@@ -31,6 +31,13 @@ def _profile_prefill_helper(
prefill_isl = [] prefill_isl = []
prefill_ttft = [] prefill_ttft = []
prefill_thpt_per_gpu = [] prefill_thpt_per_gpu = []
max_context_length -= 512 # leave some room for chat template and system prompt
if max_context_length <= 100:
error_message = (
f"max_context_length {max_context_length} is too small to profile prefill"
)
logger.error(error_message)
raise ValueError(error_message)
for isl in range( for isl in range(
100, 100,
max_context_length, max_context_length,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment