Unverified Commit b98188c8 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: profiling script missing tests when kv cache is tight (#2567)

parent 19f8eb00
......@@ -42,18 +42,25 @@ def profile_decode(
(max_context_length - osl) // interpolation_granularity,
):
max_concurrency = max_kv_tokens // (isl + osl)
if max_concurrency // interpolation_granularity == 0:
if max_concurrency == 0:
logger.warning(
f"max_kv_tokens {max_kv_tokens} is too small for"
f" isl {isl} + osl {osl}, skipping."
)
break
elif max_concurrency < interpolation_granularity:
logger.warning(
f"max_concurrency {max_concurrency} is too small for"
f" interpolation granularity {interpolation_granularity}."
f" max_kv_tokens {max_kv_tokens}, isl {isl}, osl {osl}"
)
break
sweep_num_request = range(
1,
max_concurrency,
max_concurrency // interpolation_granularity,
)
sweep_num_request = range(1, max_concurrency + 1)
else:
sweep_num_request = range(
1,
max_concurrency,
max_concurrency // interpolation_granularity,
)
for num_request in sweep_num_request:
genai_perf_artifact_dir = f"{work_dir}/gap_isl{isl}_osl{osl}_n{num_request}"
gap_result = benchmark_decode(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment