# 格式说明: # 模型名称;模型路径;tp;batch;prompt_tokens;completion_tokens;dtype;max_model_len;gpu_memory_utilization # 多个值用逗号分隔 DeepSeek-R1-Distill-Qwen-1.5B;/workspace/llms/DeepSeek-R1-Distill-Qwen-1.5B;1;1,2,4;128,512,1024;1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-7B;/workspace/llms/DeepSeek-R1-Distill-Qwen-7B;4;1,2;128,512;512,1024;bfloat16;4096;0.95 DeepSeek-R1-Distill-Llama-8B;/workspace/llms/DeepSeek-R1-Distill-Llama-8B;1;1,2,4,8;128,256,512,1024;256,512,1024,2048;float16;8192;0.95