Unverified Commit 4f76a05f authored by ericehanley's avatar ericehanley Committed by GitHub
Browse files

[BugFix] Update python to python3 calls for image; fix prefix & input calculations. (#21391)


Signed-off-by: default avatarEric Hanley <ericehanley@google.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent f154bb9f
...@@ -126,11 +126,12 @@ run_benchmark() { ...@@ -126,11 +126,12 @@ run_benchmark() {
# get a basic qps by using request-rate inf # get a basic qps by using request-rate inf
bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_inf.txt" bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_inf.txt"
prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 )) prefix_len=$(( INPUT_LEN * MIN_CACHE_HIT_PCT / 100 ))
python benchmarks/benchmark_serving.py \ adjusted_input_len=$(( INPUT_LEN - prefix_len ))
python3 benchmarks/benchmark_serving.py \
--backend vllm \ --backend vllm \
--model $MODEL \ --model $MODEL \
--dataset-name random \ --dataset-name random \
--random-input-len $INPUT_LEN \ --random-input-len $adjusted_input_len \
--random-output-len $OUTPUT_LEN \ --random-output-len $OUTPUT_LEN \
--ignore-eos \ --ignore-eos \
--disable-tqdm \ --disable-tqdm \
...@@ -159,11 +160,11 @@ run_benchmark() { ...@@ -159,11 +160,11 @@ run_benchmark() {
curl -X POST http://0.0.0.0:8004/reset_prefix_cache curl -X POST http://0.0.0.0:8004/reset_prefix_cache
sleep 5 sleep 5
bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt" bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt"
python benchmarks/benchmark_serving.py \ python3 benchmarks/benchmark_serving.py \
--backend vllm \ --backend vllm \
--model $MODEL \ --model $MODEL \
--dataset-name random \ --dataset-name random \
--random-input-len $INPUT_LEN \ --random-input-len $adjusted_input_len \
--random-output-len $OUTPUT_LEN \ --random-output-len $OUTPUT_LEN \
--ignore-eos \ --ignore-eos \
--disable-tqdm \ --disable-tqdm \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment