test.sh

#!/bin/bash
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export ALLREDUCE_STREAM_WITH_COMPUTE=1
export NCCL_MIN_NCHANNELS=16
export NCCL_MAX_NCHANNELS=16
export VLLM_PCIE_USE_CUSTOM_ALLREDUCE=1
export VLLM_USE_TRITON_PREFIX_FLASH_ATTN=1

export VLLM_NUMA_BIND=1
export VLLM_RANK0_NUMA=0
export VLLM_RANK1_NUMA=1
export VLLM_RANK2_NUMA=2
export VLLM_RANK3_NUMA=3
export VLLM_RANK4_NUMA=4
export VLLM_RANK5_NUMA=5
export VLLM_RANK6_NUMA=6
export VLLM_RANK7_NUMA=7


# 从环境变量读取参数
model_name=${MODEL_NAME}
model_path=${MODEL_PATH}
tp=${TP}
data_type=${DATA_TYPE}
batch_list=${BATCH_LIST}
prompt_pairs=${PROMPT_PAIRS}
port=${PORT}

# 生成结果文件名
result_file="/workspace/test/inference_outputs/results/${model_name}_tp${tp}.csv"
echo "tp,data_type,batch,prompt_tokens,completion_tokens,TOTAL_THROUGHPUT(toks/s),generate_throughput(toks/s),TTFT(ms),TPOT(ms),ITL(ms)" > "$result_file"

# 转换字符串为数组
IFS=' ' read -ra batches <<< "$batch_list"
IFS=',' read -ra pairs <<< "$prompt_pairs"
# 执行测试
for batch in "${batches[@]}"; do
    for pair in "${pairs[@]}"; do
        IFS=' ' read -r prompt_tokens completion_tokens <<< "$pair"
        log_file="/workspace/test/inference_outputs/logs/models/${model_name}_${tp}/batch_${batch}_prompt_${prompt_tokens}_completion_${completion_tokens}.log"
        mkdir -p "$(dirname "$log_file")"
        echo "Running: batch=$batch, prompt=$prompt_tokens, completion=$completion_tokens"
        python benchmark_serving.py \
            --backend openai \
            --port "$port" \
            --model "$model_path" \
            --trust-remote-code \
            --dataset-name random \
            --ignore-eos \
            --random-input-len "$prompt_tokens" \
            --random-output-len "$completion_tokens" \
            --num-prompts "$batch" \
            2>&1 | tee "$log_file"
        
        # 提取指标
        TOTAL_THROUGHPUT=$(grep "^Total Token" "$log_file" | awk '{print $5}')
        GEN_THROUGHPUT=$(grep "^Output token" "$log_file" | awk '{print $5}')
        TTFT=$(grep "^Mean TTFT" "$log_file" | awk '{print $4}')
        TPOT=$(grep "^Mean TPOT" "$log_file" | awk '{print $4}')
        ITL=$(grep "^Mean ITL" "$log_file" | awk '{print $4}')
        echo "$tp,$data_type,$batch,$prompt_tokens,$completion_tokens,$TOTAL_THROUGHPUT,$GEN_THROUGHPUT,$TTFT,$TPOT,$ITL" >> "$result_file"
    done
done