# 格式说明: # 模型名称;模型路径;tp;batch;prompt_tokens;completion_tokens;dtype;max_model_len;gpu_memory_utilization #模型路径为docker容器内的路径 # 多个值用逗号分隔 Meta-Llama-3-70B;/workspace/llms/Meta-Llama-3-70B;8;1,2,4,8,16,32,64;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;8192;0.95 Meta-Llama-3-70B;/workspace/llms/Meta-Llama-3-70B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;8192;0.95 Meta-Llama-3-8B;/workspace/llms/Meta-Llama-3-8B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;8192;0.95 Qwen2.5-72B;/workspace/llms/qwen2.5/Qwen2.5-72B-Instruct;8;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-72B;/workspace/llms/qwen2.5/Qwen2.5-72B-Instruct;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-32B;/workspace/llms/qwen2.5/Qwen2.5-32B-Instruct;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-32B;/workspace/llms/qwen2.5/Qwen2.5-32B-Instruct;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-14B;/workspace/llms/qwen2.5/Qwen2.5-14B-Instruct;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-14B;/workspace/llms/qwen2.5/Qwen2.5-14B-Instruct;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-7B;/workspace/llms/qwen2.5/Qwen2.5-7B-Instruct;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 QwQ-32B;/workspace/llms/QwQ-32B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-VL-32B;/workspace/llms/qwen2.5/Qwen2.5-VL-32B-Instruct;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-VL-32B;/workspace/llms/qwen2.5/Qwen2.5-VL-32B-Instruct;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen2.5-VL-7B;/workspace/llms/qwen2.5/Qwen2.5-VL-7B-Instruct;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen3-235B-A22B;/workspace/llms/qwen3/Qwen3-235B-A22B;8;1,2,4,6,8,10,12,14,16,18,20;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;20000;0.95 Qwen3-30B-A3B;/workspace/llms/qwen3/Qwen3-30B-A3B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen3-30B-A3B;/workspace/llms/qwen3/Qwen3-30B-A3B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen3-32B;/workspace/llms/qwen3/Qwen3-32B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen3-32B;/workspace/llms/qwen3/Qwen3-32B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen3-14B;/workspace/llms/qwen3/Qwen3-14B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 Qwen3-14B;/workspace/llms/qwen3/Qwen3-14B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Llama-70B;/workspace/llms/DeepSeek-R1-Distill-Llama-70B;8;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Llama-70B;/workspace/llms/DeepSeek-R1-Distill-Llama-70B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-32B;/workspace/llms/DeepSeek-R1-Distill-Qwen-32B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-32B;/workspace/llms/DeepSeek-R1-Distill-Qwen-32B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-14B;/workspace/llms/DeepSeek-R1-Distill-Qwen-14B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-14B;/workspace/llms/DeepSeek-R1-Distill-Qwen-14B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Llama-8B;/workspace/llms/DeepSeek-R1-Distill-Llama-8B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-7B;/workspace/llms/DeepSeek-R1-Distill-Qwen-7B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95 DeepSeek-R1-Distill-Qwen-1.5B;/workspace/llms/DeepSeek-R1-Distill-Qwen-1.5B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95