#!/usr/bin/env bash set -u DEFAULT_BATCH_SIZES=(1 8 16 32 64 128) MODEL_PATH="/module/step3.5-fp8/" SERVED_MODEL_NAME="/module/step3.5-fp8/" DATASET_NAME="random" DEFAULT_OUTPUT_LEN_DECODE=4096 DEFAULT_OUTPUT_LEN_PREFILL=1 DEFAULT_ROLE="decode" READY_CHECK_TIMEOUT=3 RESULT_DIR="benchmark_result" print_usage() { cat <<'USAGE' Usage: ./scripts/step3p5_benchmark_test.sh ./scripts/step3p5_benchmark_test.sh 1,8,16,32 ./scripts/step3p5_benchmark_test.sh --role prefill ./scripts/step3p5_benchmark_test.sh --role decode ./scripts/step3p5_benchmark_test.sh --role both ./scripts/step3p5_benchmark_test.sh --role prefill --output-len 1 Description: - No argument: use default batch sizes, role=decode, output-len=4096 - Optional positional argument: batch size list (comma or space separated) - Optional flag: --role - Optional flag: --output-len (must be positive integer) - role=both 时串行执行 prefill 再 decode - Result files are saved under: /prefill (when role=prefill) /decode (when role=decode) USAGE } parse_batch_sizes() { local raw_input="${1:-}" if [[ -z "$raw_input" ]]; then BATCH_SIZES=("${DEFAULT_BATCH_SIZES[@]}") return fi raw_input="${raw_input//,/ }" read -r -a BATCH_SIZES <<< "$raw_input" if (( ${#BATCH_SIZES[@]} == 0 )); then echo "[ERROR] batch size 列表为空。" print_usage exit 1 fi local batch_size for batch_size in "${BATCH_SIZES[@]}"; do if ! [[ "$batch_size" =~ ^[1-9][0-9]*$ ]]; then echo "[ERROR] 非法 batch size: $batch_size（必须是正整数）" exit 1 fi done } parse_role() { local role_input="${1:-$DEFAULT_ROLE}" if [[ "$role_input" != "prefill" && "$role_input" != "decode" && "$role_input" != "both" ]]; then echo "[ERROR] 非法 role: $role_input（必须是 prefill、decode 或 both）" print_usage exit 1 fi ROLE="$role_input" } parse_output_len() { local output_len_input="$1" if ! [[ "$output_len_input" =~ ^[1-9][0-9]*$ ]]; then echo "[ERROR] 非法 output-len: $output_len_input（必须是正整数）" print_usage exit 1 fi RANDOM_OUTPUT_LEN="$output_len_input" } calc_num_prompts() { local batch_size="$1" local num_prompts=$((batch_size + batch_size / 2)) if (( num_prompts < 16 )); then num_prompts=16 fi if (( num_prompts > 384 )); then num_prompts=384 fi if (( num_prompts < batch_size )); then num_prompts=$batch_size fi echo "$num_prompts" } main() { local batch_arg="" local role_arg="$DEFAULT_ROLE" local output_len_arg="" while (( $# > 0 )); do case "$1" in -h|--help) print_usage exit 0 ;; --role|-r) if [[ -z "${2:-}" ]]; then echo "[ERROR] --role 缺少参数。" print_usage exit 1 fi role_arg="$2" shift 2 ;; --output-len|-o) if [[ -z "${2:-}" ]]; then echo "[ERROR] --output-len 缺少参数。" print_usage exit 1 fi output_len_arg="$2" shift 2 ;; --*) echo "[ERROR] 未知参数: $1" print_usage exit 1 ;; *) if [[ -n "$batch_arg" ]]; then echo "[ERROR] 仅支持一个 batch size 列表参数。" print_usage exit 1 fi batch_arg="$1" shift ;; esac done parse_batch_sizes "$batch_arg" parse_role "$role_arg" if [[ "$ROLE" == "both" ]]; then local -a prefill_cmd=("$0") local -a decode_cmd=("$0") if [[ -n "$batch_arg" ]]; then prefill_cmd+=("$batch_arg") decode_cmd+=("$batch_arg") fi prefill_cmd+=("--role" "prefill") decode_cmd+=("--role" "decode") if [[ -n "$output_len_arg" ]]; then decode_cmd+=("--output-len" "$output_len_arg") fi echo "[INFO] role=both: 将串行执行 prefill 和 decode" echo "[INFO] step1: ${prefill_cmd[*]}" "${prefill_cmd[@]}" echo "[INFO] step2: ${decode_cmd[*]}" "${decode_cmd[@]}" echo "[INFO] role=both 执行完成。" return 0 fi if [[ "$ROLE" == "prefill" ]]; then if [[ -n "$output_len_arg" && "$output_len_arg" != "$DEFAULT_OUTPUT_LEN_PREFILL" ]]; then echo "[WARN] role=prefill 时 output-len 必须为 1，已自动覆盖为 1。" fi output_len_arg="$DEFAULT_OUTPUT_LEN_PREFILL" elif [[ -z "$output_len_arg" ]]; then output_len_arg="$DEFAULT_OUTPUT_LEN_DECODE" fi parse_output_len "$output_len_arg" RESULT_SUBDIR="$RESULT_DIR/$ROLE" mkdir -p "$RESULT_SUBDIR" echo "[INFO] 将执行 ${#BATCH_SIZES[@]} 组 benchmark" echo "[INFO] role: $ROLE" echo "[INFO] random-output-len: $RANDOM_OUTPUT_LEN" echo "[INFO] batch size 列表: ${BATCH_SIZES[*]}" echo "[INFO] result_dir: $RESULT_SUBDIR" local batch_size local num_prompts local failed_count=0 for batch_size in "${BATCH_SIZES[@]}"; do num_prompts="$(calc_num_prompts "$batch_size")" echo "" echo "[INFO] 开始测试: role=$ROLE, batch_size=$batch_size, max_concurrency=$batch_size, num_prompts=$num_prompts, random_output_len=$RANDOM_OUTPUT_LEN" if ! vllm bench serve \ --backend vllm \ --model "$MODEL_PATH" \ --served-model-name "$SERVED_MODEL_NAME" \ --dataset-name "$DATASET_NAME" \ --random-input-len 65536 \ --random-output-len "$RANDOM_OUTPUT_LEN" \ --num-prompts "$num_prompts" \ --temperature 0 \ --max-concurrency "$batch_size" \ --ready-check-timeout "$READY_CHECK_TIMEOUT" \ --result-dir "$RESULT_SUBDIR" \ --port 8018 \ --save-result; then echo "[WARN] batch_size=$batch_size 执行失败，继续下一个。" failed_count=$((failed_count + 1)) else echo "[INFO] batch_size=$batch_size 执行完成。" fi done echo "" if (( failed_count > 0 )); then echo "[WARN] 全部执行结束，但有 $failed_count 组失败。" exit 1 fi echo "[INFO] 全部 benchmark 执行完成。" } main "$@"