config.yaml 2.36 KB
Newer Older
zhouxiang's avatar
zhouxiang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
model_path: /mnt/bigdisk/qa_test_models
dst_path: /nvme/qa_test_models/autotest_model
log_path: /nvme/qa_test_models/autotest_model/log
dataset_path: /nvme/qa_test_models/...dataset


tp_config:
    internlm-chat-20b: 2
    internlm2-chat-20b: 2
    Baichuan2-13B-Chat: 2
    Mixtral-8x7B-Instruct-v0.1: 2
    internlm2-20b: 2


turbomind_model:
    - meta-llama/Llama-2-7b-chat
    - internlm/internlm2-chat-1_8b
    - internlm/internlm-chat-7b
    - internlm/internlm-chat-20b
    - internlm/internlm2-chat-7b
    - internlm/internlm2-chat-20b
    - internlm/internlm2-chat-7b-4bits
    - internlm/internlm2-chat-20b-4bits
    - Qwen/Qwen-7B-Chat
    - Qwen/Qwen-14B-Chat
    - lmdeploy/llama2-chat-7b-w4
    - baichuan-inc/Baichuan2-7B-Chat
    - 01-ai/Yi-6B-Chat
    - internlm/internlm2-1_8b
    - internlm/internlm2-20b
    - codellama/CodeLlama-7b-Instruct-hf


pytorch_model:
    - meta-llama/Llama-2-7b-chat
    - internlm/internlm-chat-7b
    - internlm/internlm-chat-20b
    - internlm/internlm2-chat-7b
    - internlm/internlm2-chat-20b
    - baichuan-inc/Baichuan2-7B-Chat
    - baichuan-inc/Baichuan2-13B-Chat
    - THUDM/chatglm2-6b
    - tiiuae/falcon-7b
    - 01-ai/Yi-6B-Chat
    - internlm/internlm2-1_8b
    - internlm/internlm2-20b
    - Qwen/Qwen1.5-7B-Chat
    - mistralai/Mistral-7B-Instruct-v0.1
    - mistralai/Mixtral-8x7B-Instruct-v0.1
    - google/gemma-7b-it
    - deepseek-ai/deepseek-moe-16b-chat


quatization_case_config:
    w4a16:
        - meta-llama/Llama-2-7b-chat
        - internlm/internlm-chat-20b
        - Qwen/Qwen-7B-Chat
        - Qwen/Qwen-14B-Chat
        - internlm/internlm2-chat-20b
        - baichuan-inc/Baichuan2-7B-Chat
        - internlm/internlm2-20b
    kvint8: # more models are supported kvint8 quantization, but the chat response are not good, already removed
        - meta-llama/Llama-2-7b-chat
        - internlm/internlm-chat-20b
        - internlm/internlm2-chat-20b
    kvint8_w4a16:
        - meta-llama/Llama-2-7b-chat
        - internlm/internlm-chat-20b
        - internlm/internlm2-chat-20b
        - internlm/internlm2-20b
        - Qwen/Qwen-7B-Chat
        - Qwen/Qwen-14B-Chat
        - baichuan-inc/Baichuan2-7B-Chat
    w8a8:
        - meta-llama/Llama-2-7b-chat
        - internlm/internlm-chat-20b
        - internlm/internlm2-chat-20b
        - internlm/internlm2-chat-7b
        - 01-ai/Yi-6B-Chat
        - internlm/internlm2-20b