Update README.md

b30d394f · jerrrrry · eac68470 · b30d394f
Commit b30d394f authored May 07, 2025 by jerrrrry
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 38 deletions

README.md README.md +3 -38

No files found.
--- a/README.md
+++ b/README.md
@@ -36,53 +36,18 @@ bs_in_out,elapsed_time,Throughput,total_tokens,output_tokens,ttft_mean,ttft_medi
 benchmark_servein_0.7.2.py
 backend_request_func.py
 使用此方式可以避免sever推理时实际生成长度和指定长度不一致问题
-bash
-#使用提供的脚本进行测试

+
+<pre>
 #启动server
-</pre>
 vllm serve $MODEL_PATH  --trust-remote-code   --dtype $dtype --max-model-len $max_len -tp $tp  --gpu-memory-utilization 0.97
-</pre>
-
-

 #发送请求
 #--distributed-executor-backend ray等其他参数根据实际情况添加
-</pre>
-方式与平常一样，只是需要加上--ignore-eos
+
 python  benchmark_servein_0.7.2.py --backend vllm --ignore-eos  --dataset-name random --random-input-len  $input_len --random-output-len  $output_len --model $MODEL_PATH  --num-prompts $num_prompts --endpoint /v1/completions
 </pre>
-prof
-offline_prof
-hipprof
-prof.py
-benchmark_throughput_0.7.2_hipprof.py
-bash
-#使用示例：
-黄色背景为额外添加的部分
-SGLANG_PROF_ROCTX=1 hipprof --trace-off python benchmark_throughput_0.7.2_hipprof.py --num-prompts 1  --input-len 2000  --output-len 1 --model  /models/Llama-2-7b-hf  --trust-remote-code  --enforce-eager --dtype float16 > 7b-prefill-2000-test.log 2>&1 
-torchprof
-benchmark_throughput_0.7.2_torchprof.py
-bash
-#启动方式与平常使用一致
-benchmark_throughput_0.7.2_torchprof.py --num-prompts 1  --input-len 2000  --output-len 1 --model  /models/Llama-2-7b-hf  --trust-remote-code  --enforce-eager --dtype float16 > 7b-prefill-2000-test.log 2>&1
-
-会打印prof信息，保存的json文件名为:
-{args.num_prompts}-{args.input_len}-{args.output_len}-{args.tensor_parallel_size}_dcu.json

-server_prof
-worker.py
-bash
-替换/usr/local/lib/python3.10/site-packages/vllm/worker/worker.py
-
-#启动服务
-loca_path为保存的json文件绝对路径
-export VLLM_TORCH_PROFILER_DIR=$loca_path 
-vllm serve $MODEL_PATH  --trust-remote-code   --dtype $dtype --max-model-len $max_len -tp $tp  --gpu-memory-utilization 0.97
-
-#发送请求
-#--distributed-executor-backend ray等其他参数根据实际情况添加
-python  benchmark_servein_0.7.2.py --backend vllm --ignore-eos  --profile --dataset-name random --random-input-len  $input_len --random-output-len  $output_len --model $MODEL_PATH  --num-prompts $num_prompts --endpoint /v1/completions

 0.6.2
 Offline推理