config_cn.json 3.2 KB
Newer Older
1
2
3
4
{
  "language": "cn",
  "category": {
    "brainstorming": {
5
      "GPT": [
6
7
8
9
        "language organization",
        "relevance",
        "creativity",
        "practicality",
10
        "reasonableness"
11
12
13
14
15
16
      ],
      "Metrics": [
        "Distinct"
      ]
    },
    "chat": {
17
      "GPT": [
18
19
20
        "language organization",
        "naturalness",
        "engagingness",
21
        "fidelity"
22
23
24
25
26
27
      ],
      "Metrics": [
        "Distinct"
      ]
    },
    "classification": {
28
      "GPT": [
29
30
31
32
33
34
        "relevance",
        "correctness"
      ],
      "Metrics": [
        "Precision",
        "Recall",
35
36
        "F1 score",
        "CHRF"
37
38
39
      ]
    },
    "closed_qa": {
40
      "GPT": [
41
42
43
44
45
46
        "relevance",
        "correctness"
      ],
      "Metrics": [
        "BLEU",
        "ROUGE",
47
48
        "BERTScore",
        "CHRF"
49
50
51
      ]
    },
    "extraction": {
52
      "GPT": [
53
54
55
56
57
58
        "relevance",
        "correctness"
      ],
      "Metrics": [
        "Precision",
        "Recall",
59
60
        "F1 score",
        "CHRF"
61
62
63
      ]
    },
    "generation": {
64
      "GPT": [
65
66
67
68
69
70
71
72
        "language organization",
        "relevance",
        "diversity"
      ],
      "Metrics": [
        "BLEU",
        "ROUGE",
        "BERTScore"
73
      ]
74
75
76
77
78
79
80
81
82
83
84
85
    },
    "logical_reasoning": {
      "GPT": [
        "correctness",
        "relevance",
        "reasonableness"
      ],
      "Metrics": [
        "BLEU",
        "ROUGE",
        "BERTScore",
        "CHRF"
86
      ]
87
88
    },
    "open_qa": {
89
      "GPT": [
90
91
92
93
94
95
96
97
98
        "language organization",
        "relevance",
        "correctness"
      ],
      "Metrics": [
        "Distinct"
      ]
    },
    "rewriting": {
99
      "GPT": [
100
101
102
103
104
105
106
107
108
109
110
        "language organization",
        "relevance",
        "correctness"
      ],
      "Metrics": [
        "BLEU",
        "ROUGE",
        "BERTScore"
      ]
    },
    "roleplay": {
111
      "GPT": [
112
113
114
115
116
117
118
119
120
121
        "language organization",
        "relevance",
        "fidelity",
        "creativity"
      ],
      "Metrics": [
        "Distinct"
      ]
    },
    "summarization": {
122
      "GPT": [
123
124
125
126
127
128
        "language organization",
        "relevance",
        "correctness",
        "conciseness"
      ],
      "Metrics": [
129
      ]
130
131
132
133
134
135
136
    },
    "Finance": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
137
      ]
138
139
140
141
142
143
144
    },
    "Law": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
145
      ]
146
147
148
149
150
151
152
    },
    "Education": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
153
      ]
154
155
156
157
158
159
160
    },
    "Medical": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
161
      ]
162
163
164
165
166
167
168
    },
    "STEM": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
169
      ]
170
171
172
173
174
175
176
    },
    "SocialScience": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
177
      ]
178
179
180
181
182
183
184
    },
    "Humanity": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
185
      ]
186
187
188
189
190
191
192
    },
    "Other": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
193
      ]
194
195
196
197
198
199
200
    },
    "ethics": {
      "GPT": [
        "relevance",
        "correctness"
      ],
      "Metrics": [
201
      ]
202
203
204
    }
  }
}