cibench.py 20.3 KB
Newer Older
Hubert's avatar
Hubert committed
1

klein's avatar
klein committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
_cibench_generation_modules = ['pandas', 'matplotlib', 'opencv', 'scipy', 'seaborn', 'pytorch']
_cibench_generation = ['cibench_generation/' + i for i in _cibench_generation_modules]
cibench_summary_groups = []
_cibench_generation_weight = {
    'matplotlib': [223, 50, 1, 156],
    'pandas': [200, 45, 45, 38],
    'pytorch': [69, 0, 8, 11],
    'seaborn': [130, 0, 2, 106],
    'opencv': [177, 21, 6, 106],
    'scipy': [161, 94, 14, 49],
}
cibench_summary_groups.extend([
    {
        'name': 'cibench_generation:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:executable',
        'subsets': [[i, 'executable'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[1] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[2] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[3] for k,v in _cibench_generation_weight.items()},
    },
])
40

klein's avatar
klein committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
_cibench_generation = ['cibench_generation_oracle/' + i for i in _cibench_generation_modules]
cibench_summary_groups.extend([
    {
        'name': 'cibench_generation_oracle:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:executable',
        'subsets': [[i, 'executable'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[1] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[2] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[3] for k,v in _cibench_generation_weight.items()},
    },
])

_cibench_template_modules = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
71
    'scipy', 'seaborn', 'sklearn', 'tensorflow']
klein's avatar
klein committed
72
_cibench_template = ['cibench_template/' + i for i in _cibench_template_modules]
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# number of total exec questions in this module
_cibench_template_weight = {
    'lightgbm': [30, 15, 0, 0],
    'matplotlib': [42, 0, 0, 36],
    'nltk': [70, 30, 20, 10],
    'opencv': [60, 10, 0, 40],
    'pandas': [60, 40, 0, 10],
    'pytorch': [28, 0, 0, 0],
    'scipy': [60, 40, 0, 0],
    'seaborn': [42, 0, 0, 35],
    'sklearn': [42, 6, 0, 18],
    'tensorflow': [36, 6, 0, 12],
}
cibench_summary_groups.extend([
klein's avatar
klein committed
87
88
89
90
91
    {
        'name': 'cibench_template:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
    {
        'name': 'cibench_template:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])

klein's avatar
klein committed
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
_cibench_template_oracle = ['cibench_template_oracle/' + i for i in _cibench_template_modules]
cibench_summary_groups.extend([
    {
        'name': 'cibench_template_oracle:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])

143
144

## chinese
klein's avatar
klein committed
145
_cibench_template_cn_modules = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
146
    'scipy', 'seaborn', 'sklearn', 'tensorflow']
klein's avatar
klein committed
147
_cibench_template_cn = ['cibench_template_chinese/' + i for i in _cibench_template_cn_modules]
148
cibench_summary_groups.extend([
klein's avatar
klein committed
149
150
151
152
153
    {
        'name': 'cibench_template_cn:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
    {
        'name': 'cibench_template_cn:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])

klein's avatar
klein committed
176
_cibench_template_cn_oracle = ['cibench_template_oracle_chinese/' + i for i in _cibench_template_cn_modules]
177
178
cibench_summary_groups.extend([
    {
klein's avatar
klein committed
179
180
181
        'name': 'cibench_template_cn_oracle:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
182
183
    },
    {
klein's avatar
klein committed
184
185
186
        'name': 'cibench_template_cn_oracle:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
187
188
    },
    {
klein's avatar
klein committed
189
190
191
192
193
194
195
196
197
198
199
200
201
        'name': 'cibench_template_cn_oracle:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn_oracle:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn_oracle:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
202
203
204
    },
])

klein's avatar
klein committed
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267

########### New summerizer for Category metric

cibench_data_manipulation = [
    ['cibench_generation/pandas', 'numeric_correct', _cibench_generation_weight['pandas'][1]],
    ['cibench_generation/pandas', 'text_score', _cibench_generation_weight['pandas'][2]],
    ['cibench_generation/pandas', 'vis_sim', _cibench_generation_weight['pandas'][3]],
    ['cibench_template/pandas', 'numeric_correct', _cibench_template_weight['pandas'][1]],
    ['cibench_template/pandas', 'text_score', _cibench_template_weight['pandas'][2]],
    ['cibench_template/pandas', 'vis_sim', _cibench_template_weight['pandas'][3]],
]
cibench_data_visualization = [
    ['cibench_generation/matplotlib', 'numeric_correct', _cibench_generation_weight['matplotlib'][1]],
    ['cibench_generation/matplotlib', 'text_score', _cibench_generation_weight['matplotlib'][2]],
    ['cibench_generation/matplotlib', 'vis_sim', _cibench_generation_weight['matplotlib'][3]],
    ['cibench_generation/seaborn', 'numeric_correct', _cibench_generation_weight['seaborn'][1]],
    ['cibench_generation/seaborn', 'text_score', _cibench_generation_weight['seaborn'][2]],
    ['cibench_generation/seaborn', 'vis_sim', _cibench_generation_weight['seaborn'][3]],
    ['cibench_template/matplotlib', 'numeric_correct', _cibench_template_weight['matplotlib'][1]],
    ['cibench_template/matplotlib', 'text_score', _cibench_template_weight['matplotlib'][2]],
    ['cibench_template/matplotlib', 'vis_sim', _cibench_template_weight['matplotlib'][3]],
    ['cibench_template/seaborn', 'numeric_correct', _cibench_template_weight['seaborn'][1]],
    ['cibench_template/seaborn', 'text_score', _cibench_template_weight['seaborn'][2]],
    ['cibench_template/seaborn', 'vis_sim', _cibench_template_weight['seaborn'][3]],
]
cibench_modeling = [
    ['cibench_generation/pytorch', 'numeric_correct', _cibench_generation_weight['pytorch'][1]],
    ['cibench_generation/pytorch', 'text_score', _cibench_generation_weight['pytorch'][2]],
    ['cibench_generation/pytorch', 'vis_sim', _cibench_generation_weight['pytorch'][3]],
    ['cibench_template/pytorch', 'numeric_correct', _cibench_template_weight['pytorch'][1]],
    ['cibench_template/pytorch', 'text_score', _cibench_template_weight['pytorch'][2]],
    ['cibench_template/pytorch', 'vis_sim', _cibench_template_weight['pytorch'][3]],
    ['cibench_template/sklearn', 'numeric_correct', _cibench_template_weight['sklearn'][1]],
    ['cibench_template/sklearn', 'text_score', _cibench_template_weight['sklearn'][2]],
    ['cibench_template/sklearn', 'vis_sim', _cibench_template_weight['sklearn'][3]],
    ['cibench_template/tensorflow', 'numeric_correct', _cibench_template_weight['tensorflow'][1]],
    ['cibench_template/tensorflow', 'text_score', _cibench_template_weight['tensorflow'][2]],
    ['cibench_template/tensorflow', 'vis_sim', _cibench_template_weight['tensorflow'][3]],
    ['cibench_template/lightgbm', 'numeric_correct', _cibench_template_weight['lightgbm'][1]],
    ['cibench_template/lightgbm', 'text_score', _cibench_template_weight['lightgbm'][2]],
    ['cibench_template/lightgbm', 'vis_sim', _cibench_template_weight['lightgbm'][3]],
]
cibench_nlp = [
    ['cibench_template/nltk', 'numeric_correct', _cibench_template_weight['nltk'][1]],
    ['cibench_template/nltk', 'text_score', _cibench_template_weight['nltk'][2]],
    ['cibench_template/nltk', 'vis_sim', _cibench_template_weight['nltk'][3]],
]
cibench_ip = [
    ['cibench_generation/opencv', 'numeric_correct', _cibench_generation_weight['opencv'][1]],
    ['cibench_generation/opencv', 'text_score', _cibench_generation_weight['opencv'][2]],
    ['cibench_generation/opencv', 'vis_sim', _cibench_generation_weight['opencv'][3]],
    ['cibench_template/opencv', 'numeric_correct', _cibench_template_weight['opencv'][1]],
    ['cibench_template/opencv', 'text_score', _cibench_template_weight['opencv'][2]],
    ['cibench_template/opencv', 'vis_sim', _cibench_template_weight['opencv'][3]],
]
cibench_math = [
    ['cibench_generation/scipy', 'numeric_correct', _cibench_generation_weight['scipy'][1]],
    ['cibench_generation/scipy', 'text_score', _cibench_generation_weight['scipy'][2]],
    ['cibench_generation/scipy', 'vis_sim', _cibench_generation_weight['scipy'][3]],
    ['cibench_template/scipy', 'numeric_correct', _cibench_template_weight['scipy'][1]],
    ['cibench_template/scipy', 'text_score', _cibench_template_weight['scipy'][2]],
    ['cibench_template/scipy', 'vis_sim', _cibench_template_weight['scipy'][3]],
]
268
269
cibench_summary_groups.extend([
    {
klein's avatar
klein committed
270
271
272
273
274
275
276
277
278
279
280
281
282
        'name': 'cibench_data_manipulation:scores',
        'subsets': [i[:2] for i in cibench_data_manipulation],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_manipulation},
    },
    {
        'name': 'cibench_data_visualization:scores',
        'subsets': [i[:2] for i in cibench_data_visualization],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_visualization},
    },
    {
        'name': 'cibench_modeling:scores',
        'subsets': [i[:2] for i in cibench_modeling],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_modeling},
283
284
    },
    {
klein's avatar
klein committed
285
286
287
        'name': 'cibench_nlp:scores',
        'subsets': [i[:2] for i in cibench_nlp],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_nlp},
288
289
    },
    {
klein's avatar
klein committed
290
291
292
293
294
295
296
297
        'name': 'cibench_ip:scores',
        'subsets': [i[:2] for i in cibench_ip],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_ip},
    },
    {
        'name': 'cibench_math:scores',
        'subsets': [i[:2] for i in cibench_math],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_math},
298
299
    },
])
klein's avatar
klein committed
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394


########### New summerizer for Category metric oracle

cibench_data_manipulation = [
    ['cibench_generation_oracle/pandas', 'numeric_correct', _cibench_generation_weight['pandas'][1]],
    ['cibench_generation_oracle/pandas', 'text_score', _cibench_generation_weight['pandas'][2]],
    ['cibench_generation_oracle/pandas', 'vis_sim', _cibench_generation_weight['pandas'][3]],
    ['cibench_template_oracle/pandas', 'numeric_correct', _cibench_template_weight['pandas'][1]],
    ['cibench_template_oracle/pandas', 'text_score', _cibench_template_weight['pandas'][2]],
    ['cibench_template_oracle/pandas', 'vis_sim', _cibench_template_weight['pandas'][3]],
]
cibench_data_visualization = [
    ['cibench_generation_oracle/matplotlib', 'numeric_correct', _cibench_generation_weight['matplotlib'][1]],
    ['cibench_generation_oracle/matplotlib', 'text_score', _cibench_generation_weight['matplotlib'][2]],
    ['cibench_generation_oracle/matplotlib', 'vis_sim', _cibench_generation_weight['matplotlib'][3]],
    ['cibench_generation_oracle/seaborn', 'numeric_correct', _cibench_generation_weight['seaborn'][1]],
    ['cibench_generation_oracle/seaborn', 'text_score', _cibench_generation_weight['seaborn'][2]],
    ['cibench_generation_oracle/seaborn', 'vis_sim', _cibench_generation_weight['seaborn'][3]],
    ['cibench_template_oracle/matplotlib', 'numeric_correct', _cibench_template_weight['matplotlib'][1]],
    ['cibench_template_oracle/matplotlib', 'text_score', _cibench_template_weight['matplotlib'][2]],
    ['cibench_template_oracle/matplotlib', 'vis_sim', _cibench_template_weight['matplotlib'][3]],
    ['cibench_template_oracle/seaborn', 'numeric_correct', _cibench_template_weight['seaborn'][1]],
    ['cibench_template_oracle/seaborn', 'text_score', _cibench_template_weight['seaborn'][2]],
    ['cibench_template_oracle/seaborn', 'vis_sim', _cibench_template_weight['seaborn'][3]],
]
cibench_modeling = [
    ['cibench_generation_oracle/pytorch', 'numeric_correct', _cibench_generation_weight['pytorch'][1]],
    ['cibench_generation_oracle/pytorch', 'text_score', _cibench_generation_weight['pytorch'][2]],
    ['cibench_generation_oracle/pytorch', 'vis_sim', _cibench_generation_weight['pytorch'][3]],
    ['cibench_template_oracle/pytorch', 'numeric_correct', _cibench_template_weight['pytorch'][1]],
    ['cibench_template_oracle/pytorch', 'text_score', _cibench_template_weight['pytorch'][2]],
    ['cibench_template_oracle/pytorch', 'vis_sim', _cibench_template_weight['pytorch'][3]],
    ['cibench_template_oracle/sklearn', 'numeric_correct', _cibench_template_weight['sklearn'][1]],
    ['cibench_template_oracle/sklearn', 'text_score', _cibench_template_weight['sklearn'][2]],
    ['cibench_template_oracle/sklearn', 'vis_sim', _cibench_template_weight['sklearn'][3]],
    ['cibench_template_oracle/tensorflow', 'numeric_correct', _cibench_template_weight['tensorflow'][1]],
    ['cibench_template_oracle/tensorflow', 'text_score', _cibench_template_weight['tensorflow'][2]],
    ['cibench_template_oracle/tensorflow', 'vis_sim', _cibench_template_weight['tensorflow'][3]],
    ['cibench_template_oracle/lightgbm', 'numeric_correct', _cibench_template_weight['lightgbm'][1]],
    ['cibench_template_oracle/lightgbm', 'text_score', _cibench_template_weight['lightgbm'][2]],
    ['cibench_template_oracle/lightgbm', 'vis_sim', _cibench_template_weight['lightgbm'][3]],
]
cibench_nlp = [
    ['cibench_template_oracle/nltk', 'numeric_correct', _cibench_template_weight['nltk'][1]],
    ['cibench_template_oracle/nltk', 'text_score', _cibench_template_weight['nltk'][2]],
    ['cibench_template_oracle/nltk', 'vis_sim', _cibench_template_weight['nltk'][3]],
]
cibench_ip = [
    ['cibench_generation_oracle/opencv', 'numeric_correct', _cibench_generation_weight['opencv'][1]],
    ['cibench_generation_oracle/opencv', 'text_score', _cibench_generation_weight['opencv'][2]],
    ['cibench_generation_oracle/opencv', 'vis_sim', _cibench_generation_weight['opencv'][3]],
    ['cibench_template_oracle/opencv', 'numeric_correct', _cibench_template_weight['opencv'][1]],
    ['cibench_template_oracle/opencv', 'text_score', _cibench_template_weight['opencv'][2]],
    ['cibench_template_oracle/opencv', 'vis_sim', _cibench_template_weight['opencv'][3]],
]
cibench_math = [
    ['cibench_generation_oracle/scipy', 'numeric_correct', _cibench_generation_weight['scipy'][1]],
    ['cibench_generation_oracle/scipy', 'text_score', _cibench_generation_weight['scipy'][2]],
    ['cibench_generation_oracle/scipy', 'vis_sim', _cibench_generation_weight['scipy'][3]],
    ['cibench_template_oracle/scipy', 'numeric_correct', _cibench_template_weight['scipy'][1]],
    ['cibench_template_oracle/scipy', 'text_score', _cibench_template_weight['scipy'][2]],
    ['cibench_template_oracle/scipy', 'vis_sim', _cibench_template_weight['scipy'][3]],
]
cibench_summary_groups.extend([
    {
        'name': 'cibench_data_manipulation_oracle:scores',
        'subsets': [i[:2] for i in cibench_data_manipulation],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_manipulation},
    },
    {
        'name': 'cibench_data_visualization_oracle:scores',
        'subsets': [i[:2] for i in cibench_data_visualization],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_visualization},
    },
    {
        'name': 'cibench_modeling_oracle:scores',
        'subsets': [i[:2] for i in cibench_modeling],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_modeling},
    },
    {
        'name': 'cibench_nlp_oracle:scores',
        'subsets': [i[:2] for i in cibench_nlp],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_nlp},
    },
    {
        'name': 'cibench_ip_oracle:scores',
        'subsets': [i[:2] for i in cibench_ip],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_ip},
    },
    {
        'name': 'cibench_math_oracle:scores',
        'subsets': [i[:2] for i in cibench_math],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_math},
    },
395
])