echo "tp,data_type,batch,prompt_tokens,completion_tokens,TOTAL_THROUGHPUT(toks/s),generate_throughput(toks/s), TTFT(ms),TPOT(ms),ITL(ms),P99_TTFT(ms),P99_TPOT(ms),P99_ITL(ms)" > DeepSeek-R1-0528-W4A8-V2_tp8-nopc.csv pairs=( "512 512" "1024 512" "2048 512" "4096 512" "8192 512" "16384 512") model_path="/module3/DeepSeek-R1-0528-W4A8-V2" tp=8 data_type="W4A8" mkdir -p ./log/ for pair in "${pairs[@]}"; do for batch in 1 2 4 8 16 32 64 128 ; do prompt_tokens=${pair%% *} completion_tokens=${pair#* } echo "data_type: $data_type,batch: $batch, prompt_tokens: $prompt_tokens, completion_tokens: $completion_tokens, tp: ${tp}" log_path="log/vllm_${model}_batch_${batch}_prompt_tokens_${prompt_tokens}_completion_tokens_${completion_tokens}_tp_${tp}.log" touch $log_path # benchmark_throughput.py python benchmark_serving.py \ --backend openai \ --port 20011\ --model ${model_path} \ --trust-remote-code \ --dataset-name random \ --ignore-eos \ --random-input-len ${prompt_tokens} \ --random-output-len ${completion_tokens} \ --num-prompts ${batch} \ 2>&1 | tee $log_path #metric E2E_TIME=`grep "^Benchmark duration" $log_path | awk -F ' ' '{print $4}'` REQ_THROUGHPUT=`grep "^Request throughput" $log_path| awk -F ' ' '{print $4}'` GEN_THROUGHPUT=`grep "^Output token" $log_path| awk -F ' ' '{print $5}'` TOTAL_THROUGHPUT=`grep "^Total Token" $log_path| awk -F ' ' '{print $5}'` TTFT=`grep "^Mean TTFT" $log_path| awk -F ' ' '{print $4}'` TPOT=`grep "^Mean TPOT" $log_path| awk -F ' ' '{print $4}'` ITL=`grep "^Mean ITL" $log_path| awk -F ' ' '{print $4}'` P99_ITL=`grep "^P99 ITL" $log_path| awk -F ' ' '{print $4}'` P99_TTFT=`grep "^P99 TTFT" $log_path| awk -F ' ' '{print $4}'` P99_TPOT=`grep "^P99 TPOT" $log_path| awk -F ' ' '{print $4}'` echo "$tp,$data_type,$batch,$prompt_tokens,$completion_tokens,$TOTAL_THROUGHPUT,$GEN_THROUGHPUT,$TTFT,$TPOT, $ITL,$P99_TTFT,$P99_TPOT,$P99_ITL" >> DeepSeek-R1-0528-W4A8-V2_tp8-nopc.csv done done