models-to-test.cfg 5.26 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# 格式说明:
# 模型名称;模型路径;tp;batch;prompt_tokens;completion_tokens;dtype;max_model_len;gpu_memory_utilization
#模型路径为docker容器内的路径
# 多个值用逗号分隔

Meta-Llama-3-70B;/workspace/llms/Meta-Llama-3-70B;8;1,2,4,8,16,32,64;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;8192;0.95

Meta-Llama-3-70B;/workspace/llms/Meta-Llama-3-70B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;8192;0.95

Meta-Llama-3-8B;/workspace/llms/Meta-Llama-3-8B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;8192;0.95



Qwen2.5-72B;/workspace/llms/qwen2.5/Qwen2.5-72B-Instruct;8;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-72B;/workspace/llms/qwen2.5/Qwen2.5-72B-Instruct;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-32B;/workspace/llms/qwen2.5/Qwen2.5-32B-Instruct;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-32B;/workspace/llms/qwen2.5/Qwen2.5-32B-Instruct;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-14B;/workspace/llms/qwen2.5/Qwen2.5-14B-Instruct;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-14B;/workspace/llms/qwen2.5/Qwen2.5-14B-Instruct;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-7B;/workspace/llms/qwen2.5/Qwen2.5-7B-Instruct;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95



QwQ-32B;/workspace/llms/QwQ-32B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95



Qwen2.5-VL-32B;/workspace/llms/qwen2.5/Qwen2.5-VL-32B-Instruct;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-VL-32B;/workspace/llms/qwen2.5/Qwen2.5-VL-32B-Instruct;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen2.5-VL-7B;/workspace/llms/qwen2.5/Qwen2.5-VL-7B-Instruct;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95


Qwen3-235B-A22B;/workspace/llms/qwen3/Qwen3-235B-A22B;8;1,2,4,6,8,10,12,14,16,18,20;512,512,1024,2048,4096;512,1024,1024,1024,1024;float16;20000;0.95

Qwen3-30B-A3B;/workspace/llms/qwen3/Qwen3-30B-A3B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen3-30B-A3B;/workspace/llms/qwen3/Qwen3-30B-A3B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen3-32B;/workspace/llms/qwen3/Qwen3-32B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen3-32B;/workspace/llms/qwen3/Qwen3-32B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen3-14B;/workspace/llms/qwen3/Qwen3-14B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

Qwen3-14B;/workspace/llms/qwen3/Qwen3-14B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95




DeepSeek-R1-Distill-Llama-70B;/workspace/llms/DeepSeek-R1-Distill-Llama-70B;8;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Llama-70B;/workspace/llms/DeepSeek-R1-Distill-Llama-70B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Qwen-32B;/workspace/llms/DeepSeek-R1-Distill-Qwen-32B;4;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Qwen-32B;/workspace/llms/DeepSeek-R1-Distill-Qwen-32B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Qwen-14B;/workspace/llms/DeepSeek-R1-Distill-Qwen-14B;2;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Qwen-14B;/workspace/llms/DeepSeek-R1-Distill-Qwen-14B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Llama-8B;/workspace/llms/DeepSeek-R1-Distill-Llama-8B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Qwen-7B;/workspace/llms/DeepSeek-R1-Distill-Qwen-7B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95

DeepSeek-R1-Distill-Qwen-1.5B;/workspace/llms/DeepSeek-R1-Distill-Qwen-1.5B;1;1,2,4,8,16,32,64;512,512,1024,2048,4096,8192,16384,20480;512,1024,1024,1024,1024,1024,1024,1024;float16;32768;0.95