dataset-index.yml 23.8 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739

- ifeval:
    name: IFEval
    category: Instruction Following
    paper: https://arxiv.org/pdf/2311.07911
    configpath: opencompass/configs/datasets/IFEval
- nphard:
    name: NPHardEval
    category: Reasoning
    paper: https://arxiv.org/pdf/2312.14890v2
    configpath: opencompass/configs/datasets/NPHardEval
- pmmeval:
    name: PMMEval
    category: Language
    paper: https://arxiv.org/pdf/2411.09116v1
    configpath: opencompass/configs/datasets/PMMEval
- theoremqa:
    name: TheroremQA
    category: Reasoning
    paper: https://arxiv.org/pdf/2305.12524
    configpath: opencompass/configs/datasets/TheroremQA
- agieval:
    name: AGIEval
    category: Examination
    paper: https://arxiv.org/pdf/2304.06364
    configpath: opencompass/configs/datasets/agieval
- babilong:
    name: BABILong
    category: Long Context
    paper: https://arxiv.org/pdf/2406.10149
    configpath: opencompass/configs/datasets/babilong
- bigcodebench:
    name: BigCodeBench
    category: Code
    paper: https://arxiv.org/pdf/2406.15877
    configpath: opencompass/configs/datasets/bigcodebench
- calm:
    name: CaLM
    category: Reasoning
    paper: https://arxiv.org/pdf/2405.00622
    configpath: opencompass/configs/datasets/calm
- infinitebench:
    name: InfiniteBench (∞Bench)
    category: Long Context
    paper: https://aclanthology.org/2024.acl-long.814.pdf
    configpath: opencompass/configs/datasets/infinitebench
- korbench:
    name: KOR-Bench
    category: Reasoning
    paper: https://arxiv.org/pdf/2410.06526v1
    configpath: opencompass/configs/datasets/korbench
- lawbench:
    name: LawBench
    category: Knowledge / Law
    paper: https://arxiv.org/pdf/2309.16289
    configpath: opencompass/configs/datasets/lawbench
- leval:
    name: L-Eval
    category: Long Context
    paper: https://arxiv.org/pdf/2307.11088v1
    configpath: opencompass/configs/datasets/leval
- livecodebench:
    name: LiveCodeBench
    category: Code
    paper: https://arxiv.org/pdf/2403.07974
    configpath: opencompass/configs/datasets/livecodebench
- livemathbench:
    name: LiveMathBench
    category: Math
    paper: https://arxiv.org/pdf/2412.13147
    configpath: opencompass/configs/datasets/livemathbench
- longbench:
    name: LongBench
    category: Long Context
    paper: https://github.com/THUDM/LongBench
    configpath: opencompass/configs/datasets/livemathbench
- lveval:
    name: LV-Eval
    category: Long Context
    paper: https://arxiv.org/pdf/2402.05136
    configpath: opencompass/configs/datasets/lveval
- medbench:
    name: MedBench
    category: Knowledge / Medicine
    paper: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10778138
    configpath: opencompass/configs/datasets/MedBench
- musr:
    name: MuSR
    category: Reasoning
    paper: https://arxiv.org/pdf/2310.16049
    configpath: opencompass/configs/datasets/musr
- needlebench:
    name: NeedleBench
    category: Long Context
    paper: https://arxiv.org/pdf/2407.11963
    configpath: opencompass/configs/datasets/needlebench
- ruler:
    name: RULER
    category: Long Context
    paper: https://arxiv.org/pdf/2404.06654
    configpath: opencompass/configs/datasets/ruler
- alignment:
    name: AlignBench
    category: Subjective / Alignment
    paper: https://arxiv.org/pdf/2311.18743
    configpath: opencompass/configs/datasets/subjective/alignbench
- alpaca:
    name: AlpacaEval
    category: Subjective / Instruction Following
    paper: https://github.com/tatsu-lab/alpaca_eval
    configpath: opencompass/configs/datasets/subjective/aplaca_eval
- arenahard:
    name: Arena-Hard
    category: Subjective / Chatbot
    paper: https://lmsys.org/blog/2024-04-19-arena-hard/
    configpath: opencompass/configs/datasets/subjective/arena_hard
- flames:
    name: FLAMES
    category: Subjective / Alignment
    paper: https://arxiv.org/pdf/2311.06899
    configpath: opencompass/configs/datasets/subjective/flames
- fofo:
    name: FOFO
    category: Subjective / Format Following
    paper: https://arxiv.org/pdf/2402.18667
    configpath: opencompass/configs/datasets/subjective/fofo
- followbench:
    name: FollowBench
    category: Subjective / Instruction Following
    paper: https://arxiv.org/pdf/2310.20410
    configpath: opencompass/configs/datasets/subjective/followbench
- hellobench:
    name: HelloBench
    category: Subjective / Long Context
    paper: https://arxiv.org/pdf/2409.16191
    configpath: opencompass/configs/datasets/subjective/hellobench
- judgerbench:
    name: JudgerBench
    category: Subjective / Long Context
    paper: https://arxiv.org/pdf/2410.16256
    configpath: opencompass/configs/datasets/subjective/judgerbench
- multiround:
    name: MT-Bench-101
    category: Subjective / Multi-Round
    paper: https://arxiv.org/pdf/2402.14762
    configpath: opencompass/configs/datasets/subjective/multiround
- wildbench:
    name: WildBench
    category: Subjective / Real Task
    paper: https://arxiv.org/pdf/2406.04770
    configpath: opencompass/configs/datasets/subjective/wildbench
- teval:
    name: T-Eval
    category: Tool Utilization
    paper: https://arxiv.org/pdf/2312.14033
    configpath: opencompass/configs/datasets/teval
- finalceiq:
    name: FinanceIQ
    category: Knowledge / Finance
    paper: https://github.com/Duxiaoman-DI/XuanYuan/tree/main/FinanceIQ
    configpath: opencompass/configs/datasets/FinanceIQ
- gaokaobench:
    name: GAOKAOBench
    category: Examination
    paper: https://arxiv.org/pdf/2305.12474
    configpath: opencompass/configs/datasets/GaokaoBench
- lcbench:
    name: LCBench
    category: Code
    paper: https://github.com/open-compass/CodeBench/
    configpath: opencompass/configs/datasets/LCBench
- MMLUArabic:
    name: ArabicMMLU
    category: Language
    paper: https://arxiv.org/pdf/2402.12840
    configpath: opencompass/configs/datasets/MMLUArabic
- OpenFinData:
    name: OpenFinData
    category: Knowledge / Finance
    paper: https://github.com/open-compass/OpenFinData
    configpath: opencompass/configs/datasets/OpenFinData
- QuALITY:
    name: QuALITY
    category: Long Context
    paper: https://arxiv.org/pdf/2112.08608
    configpath: opencompass/configs/datasets/QuALITY
- advglue:
    name: Adversarial GLUE
    category: Safety
    paper: https://openreview.net/pdf?id=GF9cSKI3A_q
    configpath: opencompass/configs/datasets/adv_glue
- afqmcd:
    name: CLUE / AFQMC
    category: Language
    paper: https://arxiv.org/pdf/2004.05986
    configpath: opencompass/configs/datasets/CLUE_afqmc
- aime2024:
    name: AIME2024
    category: Examination
    paper: https://huggingface.co/datasets/Maxwell-Jia/AIME_2024
    configpath: opencompass/configs/datasets/aime2024
- anli:
    name: Adversarial NLI
    category: Reasoning
    paper: https://arxiv.org/pdf/1910.14599v2
    configpath: opencompass/configs/datasets/anli
- anthropics_evals:
    name: Anthropics Evals
    category: Safety
    paper: https://arxiv.org/pdf/2212.09251
    configpath: opencompass/configs/datasets/anthropics_evals
- apps:
    name: APPS
    category: Code
    paper: https://arxiv.org/pdf/2105.09938
    configpath: opencompass/configs/datasets/apps
- arc:
    name: ARC
    category: Reasoning
    paper: https://arxiv.org/pdf/1803.05457
    configpath: [opencompass/configs/datasets/ARC_c, opencompass/configs/datasets/ARC_e]
- arc_prize_public_eval:
    name: ARC Prize
    category: ARC-AGI
    paper: https://arcprize.org/guide#private
    configpath: opencompass/configs/datasets/ARC_Prize_Public_Evaluation
- ax:
    name: SuperGLUE / AX
    category: Reasoning
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: [opencompass/configs/datasets/SuperGLUE_AX_b, opencompass/configs/datasets/SuperGLUE_AX_g]
- bbh:
    name: BIG-Bench Hard
    category: Reasoning
    paper: https://arxiv.org/pdf/2210.09261
    configpath: opencompass/configs/datasets/bbh
- BoolQ:
    name: SuperGLUE / BoolQ
    category: Knowledge
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_BoolQ
- c3:
    name: CLUE / C3 (C³)
    category: Understanding
    paper: https://arxiv.org/pdf/2004.05986
    configpath: opencompass/configs/datasets/CLUE_C3
- cb:
    name: SuperGLUE / CB
    category: Reasoning
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_CB
- ceval:
    name: C-EVAL
    category: Examination
    paper: https://arxiv.org/pdf/2305.08322v1
    configpath: opencompass/configs/datasets/ceval
- charm:
    name: CHARM
    category: Reasoning
    paper: https://arxiv.org/pdf/2403.14112
    configpath: opencompass/configs/datasets/CHARM
- chembench:
    name: ChemBench
    category: Knowledge / Chemistry
    paper: https://arxiv.org/pdf/2404.01475
    configpath: opencompass/configs/datasets/ChemBench
- chid:
    name: FewCLUE / CHID
    category: Language
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_chid
- chinese_simpleqa:
    name: Chinese SimpleQA
    category: Knowledge
    paper: https://arxiv.org/pdf/2411.07140
    configpath: opencompass/configs/datasets/chinese_simpleqa
- cibench:
    name: CIBench
    category: Code
    paper: https://www.arxiv.org/pdf/2407.10499
    configpath: opencompass/configs/datasets/CIBench
- civilcomments:
    name: CivilComments
    category: Safety
    paper: https://arxiv.org/pdf/1903.04561
    configpath: opencompass/configs/datasets/civilcomments
- clozeTest_maxmin:
    name: Cloze Test-max/min
    category: Code
    paper: https://arxiv.org/pdf/2102.04664
    configpath: opencompass/configs/datasets/clozeTest_maxmin
- cluewsc:
    name: FewCLUE / CLUEWSC
    category: Language / WSC
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_cluewsc
- cmb:
    name: CMB
    category: Knowledge / Medicine
    paper: https://arxiv.org/pdf/2308.08833
    configpath: opencompass/configs/datasets/cmb
- cmmlu:
    name: CMMLU
    category: Understanding
    paper: https://arxiv.org/pdf/2306.09212
    configpath: opencompass/configs/datasets/cmmlu
- cmnli:
    name: CLUE / CMNLI
    category: Reasoning
    paper: https://arxiv.org/pdf/2004.05986
    configpath: opencompass/configs/datasets/CLUE_cmnli
- cmo_fib:
    name: cmo_fib
    category: Examination
    paper: ""
    configpath: opencompass/configs/datasets/cmo_fib
- cmrc:
    name: CLUE / CMRC
    category: Understanding
    paper: https://arxiv.org/pdf/2004.05986
    configpath: opencompass/configs/datasets/CLUE_CMRC
- commonsenseqa:
    name: CommonSenseQA
    category: Knowledge
    paper: https://arxiv.org/pdf/1811.00937v2
    configpath: opencompass/configs/datasets/commonsenseqa
- commonsenseqa_cn:
    name: CommonSenseQA-CN
    category: Knowledge
    paper: ""
    configpath: opencompass/configs/datasets/commonsenseqa_cn
- copa:
    name: SuperGLUE / COPA
    category: Reasoning
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_COPA
- crowspairs:
    name: CrowsPairs
    category: Safety
    paper: https://arxiv.org/pdf/2010.00133
    configpath: opencompass/configs/datasets/crowspairs
- crowspairs_cn:
    name: CrowsPairs-CN
    category: Safety
    paper: ""
    configpath: opencompass/configs/datasets/crowspairs_cn
- cvalues:
    name: CVALUES
    category: Safety
    paper: http://xdp-expriment.oss-cn-zhangjiakou.aliyuncs.com/shanqi.xgh/release_github/CValues.pdf
    configpath: opencompass/configs/datasets/cvalues
- drcd:
    name: CLUE / DRCD
    category: Understanding
    paper: https://arxiv.org/pdf/2004.05986
    configpath: opencompass/configs/datasets/CLUE_DRCD
- drop:
    name: DROP (DROP Simple Eval)
    category: Understanding
    paper: https://arxiv.org/pdf/1903.00161
    configpath: opencompass/configs/datasets/drop
- ds1000:
    name: DS-1000
    category: Code
    paper: https://arxiv.org/pdf/2211.11501
    configpath: opencompass/configs/datasets/ds1000
- eprstmt:
    name: FewCLUE / EPRSTMT
    category: Understanding
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_eprstmt
- flores:
    name: Flores
    category: Language
    paper: https://aclanthology.org/D19-1632.pdf
    configpath: opencompass/configs/datasets/flores
- game24:
    name: Game24
    category: Math
    paper: https://huggingface.co/datasets/nlile/24-game
    configpath: opencompass/configs/datasets/game24
- govrepcrs:
    name: Government Report Dataset
    category: Long Context
    paper: https://aclanthology.org/2021.naacl-main.112.pdf
    configpath: opencompass/configs/datasets/govrepcrs
- gpqa:
    name: GPQA
    category: Knowledge
    paper: https://arxiv.org/pdf/2311.12022v1
    configpath: opencompass/configs/datasets/gpqa
- gsm8k:
    name: GSM8K
    category: Math
    paper: https://arxiv.org/pdf/2110.14168v2
    configpath: opencompass/configs/datasets/gsm8k
- gsm_hard:
    name: GSM-Hard
    category: Math
    paper: https://proceedings.mlr.press/v202/gao23f/gao23f.pdf
    configpath: opencompass/configs/datasets/gsm_hard
- hle:
    name: HLE(Humanity's Last Exam)
    category: Reasoning
    paper: https://lastexam.ai/paper
    configpath: opencompass/configs/datasets/HLE
- hellaswag:
    name: HellaSwag
    category: Reasoning
    paper: https://arxiv.org/pdf/1905.07830
    configpath: opencompass/configs/datasets/hellaswag
- humaneval:
    name: HumanEval
    category: Code
    paper: https://arxiv.org/pdf/2107.03374v2
    configpath: opencompass/configs/datasets/humaneval
- humaneval_cn:
    name: HumanEval-CN
    category: Code
    paper: ""
    configpath: opencompass/configs/datasets/humaneval_cn
- humaneval_multi:
    name: Multi-HumanEval
    category: Code
    paper: https://arxiv.org/pdf/2210.14868
    configpath: opencompass/configs/datasets/humaneval_multi
- humanevalx:
    name: HumanEval-X
    category: Code
    paper: https://dl.acm.org/doi/pdf/10.1145/3580305.3599790
    configpath: opencompass/configs/datasets/humanevalx
- hungarian_math:
    name: Hungarian_Math
    category: Math
    paper: https://huggingface.co/datasets/keirp/hungarian_national_hs_finals_exam
    configpath: opencompass/configs/datasets/hungarian_exam
- iwslt2017:
    name: IWSLT2017
    category: Language
    paper: https://cris.fbk.eu/bitstream/11582/312796/1/iwslt17-overview.pdf
    configpath: opencompass/configs/datasets/iwslt2017
- jigsawmultilingual:
    name: JigsawMultilingual
    category: Safety
    paper: https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
    configpath: opencompass/configs/datasets/jigsawmultilingual
- lambada:
    name: LAMBADA
    category: Understanding
    paper: https://arxiv.org/pdf/1606.06031
    configpath: opencompass/configs/datasets/lambada
- lcsts:
    name: LCSTS
    category: Understanding
    paper: https://aclanthology.org/D15-1229.pdf
    configpath: opencompass/configs/datasets/lcsts
- livestembench:
    name: LiveStemBench
    category: ""
    paper: ""
    configpath: opencompass/configs/datasets/livestembench
- llm_compression:
    name: LLM Compression
    category: Bits Per Character (BPC)
    paper: https://arxiv.org/pdf/2404.09937
    configpath: opencompass/configs/datasets/llm_compression
- math:
    name: MATH
    category: Math
    paper: https://arxiv.org/pdf/2103.03874
    configpath: opencompass/configs/datasets/math
- math401:
    name: MATH 401
    category: Math
    paper: https://arxiv.org/pdf/2304.02015
    configpath: opencompass/configs/datasets/math401
- mathbench:
    name: MathBench
    category: Math
    paper: https://arxiv.org/pdf/2405.12209
    configpath: opencompass/configs/datasets/mathbench
- mbpp:
    name: MBPP
    category: Code
    paper: https://arxiv.org/pdf/2108.07732
    configpath: opencompass/configs/datasets/mbpp
- mbpp_cn:
    name: MBPP-CN
    category: Code
    paper: ""
    configpath: opencompass/configs/datasets/mbpp_cn
- mbpp_plus:
    name: MBPP-PLUS
    category: Code
    paper: ""
    configpath: opencompass/configs/datasets/mbpp_plus
- mgsm:
    name: MGSM
    category: Language / Math
    paper: https://arxiv.org/pdf/2210.03057
    configpath: opencompass/configs/datasets/mgsm
- mmlu:
    name: MMLU
    category: Understanding
    paper: https://arxiv.org/pdf/2009.03300
    configpath: opencompass/configs/datasets/mmlu
- mmlu_cf:
    name: MMLU-CF
    category: Understanding
    paper: https://arxiv.org/pdf/2412.15194
    configpath: opencompass/configs/datasets/mmlu_cf
- mmlu_pro:
    name: MMLU-Pro
    category: Understanding
    paper: https://arxiv.org/pdf/2406.01574
    configpath: opencompass/configs/datasets/mmlu_pro
- mmmlu:
    name: MMMLU
    category: Language / Understanding
    paper: https://huggingface.co/datasets/openai/MMMLU
    configpath: opencompass/configs/datasets/mmmlu
- multirc:
    name: SuperGLUE / MultiRC
    category: Understanding
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_MultiRC
- narrativeqa:
    name: NarrativeQA
    category: Understanding
    paper: https://github.com/google-deepmind/narrativeqa
    configpath: opencompass/configs/datasets/narrativeqa
- natural_question:
    name: NaturalQuestions
    category: Knowledge
    paper: https://github.com/google-research-datasets/natural-questions
    configpath: opencompass/configs/datasets/nq
- natural_question_cn:
    name: NaturalQuestions-CN
    category: Knowledge
    paper: ""
    configpath: opencompass/configs/datasets/nq_cn
- obqa:
    name: OpenBookQA
    category: Knowledge
    paper: https://arxiv.org/pdf/1809.02789v1
    configpath: opencompass/configs/datasets/obqa
- piqa:
    name: OpenBookQA
    category: Knowledge / Physics
    paper: https://arxiv.org/pdf/1911.11641v1
    configpath: opencompass/configs/datasets/piqa
- py150:
    name: py150
    category: Code
    paper: https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/CodeCompletion-line
    configpath: opencompass/configs/datasets/py150
- qasper:
    name: Qasper
    category: Long Context
    paper: https://arxiv.org/pdf/2105.03011
    configpath: opencompass/configs/datasets/qasper
- qaspercut:
    name: Qasper-Cut
    category: Long Context
    paper: ""
    configpath: opencompass/configs/datasets/qaspercut
- race:
    name: RACE
    category: Examination
    paper: https://arxiv.org/pdf/1704.04683
    configpath: opencompass/configs/datasets/race
- realtoxicprompts:
    name: RealToxicPrompts
    category: Safety
    paper: https://arxiv.org/pdf/2009.11462
    configpath: opencompass/configs/datasets/realtoxicprompts
- record:
    name: SuperGLUE / ReCoRD
    category: Understanding
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_ReCoRD
- rte:
    name: SuperGLUE / RTE
    category: Reasoning
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_RTE
- ocnli:
    name: CLUE / OCNLI
    category: Reasoning
    paper: https://arxiv.org/pdf/2004.05986
    configpath: opencompass/configs/datasets/CLUE_ocnli
- rolebench:
    name: RoleBench
    category: Role Play
    paper: https://arxiv.org/pdf/2310.00746
    configpath: opencompass/configs/datasets/rolebench
- s3eval:
    name: S3Eval
    category: Long Context
    paper: https://aclanthology.org/2024.naacl-long.69.pdf
    configpath: opencompass/configs/datasets/s3eval
- scibench:
    name: SciBench
    category: Reasoning
    paper: https://sxkdz.github.io/files/publications/ICML/SciBench/SciBench.pdf
    configpath: opencompass/configs/datasets/scibench
- scicode:
    name: SciCode
    category: Code
    paper: https://arxiv.org/pdf/2407.13168
    configpath: opencompass/configs/datasets/scicode
- simpleqa:
    name: SimpleQA
    category: Knowledge
    paper: https://arxiv.org/pdf/2411.04368
    configpath: opencompass/configs/datasets/SimpleQA
- siqa:
    name: SocialIQA
    category: Reasoning
    paper: https://arxiv.org/pdf/1904.09728
    configpath: opencompass/configs/datasets/siqa
- squad20:
    name: SQuAD2.0
    category: Understanding
    paper: https://arxiv.org/pdf/1806.03822
    configpath: opencompass/configs/datasets/squad20
- storycloze:
    name: StoryCloze
    category: Reasoning
    paper: https://aclanthology.org/2022.emnlp-main.616.pdf
    configpath: opencompass/configs/datasets/storycloze
- strategyqa:
    name: StrategyQA
    category: Reasoning
    paper: https://arxiv.org/pdf/2101.02235
    configpath: opencompass/configs/datasets/strategyqa
- summedits:
    name: SummEdits
    category: Language
    paper: https://aclanthology.org/2023.emnlp-main.600.pdf
    configpath: opencompass/configs/datasets/summedits
- summscreen:
    name: SummScreen
    category: Understanding
    paper: https://arxiv.org/pdf/2104.07091v1
    configpath: opencompass/configs/datasets/summscreen
- svamp:
    name: SVAMP
    category: Math
    paper: https://aclanthology.org/2021.naacl-main.168.pdf
    configpath: opencompass/configs/datasets/SVAMP
- tabmwp:
    name: TabMWP
    category: Math / Table
    paper: https://arxiv.org/pdf/2209.14610
    configpath: opencompass/configs/datasets/TabMWP
- taco:
    name: TACO
    category: Code
    paper: https://arxiv.org/pdf/2312.14852
    configpath: opencompass/configs/datasets/taco
- tnews:
    name: FewCLUE / TNEWS
    category: Understanding
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_tnews
- bustm:
    name: FewCLUE / BUSTM
    category: Reasoning
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_bustm
- csl:
    name: FewCLUE / CSL
    category: Understanding
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_csl
- ocnli_fc:
    name: FewCLUE / OCNLI-FC
    category: Reasoning
    paper: https://arxiv.org/pdf/2107.07498
    configpath: opencompass/configs/datasets/FewCLUE_ocnli_fc
- triviaqa:
    name: TriviaQA
    category: Knowledge
    paper: https://arxiv.org/pdf/1705.03551v2
    configpath: opencompass/configs/datasets/triviaqa
- triviaqarc:
    name: TriviaQA-RC
    category: Knowledge / Understanding
    paper: ""
    configpath: opencompass/configs/datasets/triviaqarc
- truthfulqa:
    name: TruthfulQA
    category: Safety
    paper: https://arxiv.org/pdf/2109.07958v2
    configpath: opencompass/configs/datasets/truthfulqa
- tydiqa:
    name: TyDi-QA
    category: Language
    paper: https://storage.googleapis.com/tydiqa/tydiqa.pdf
    configpath: opencompass/configs/datasets/tydiqa
- wic:
    name: SuperGLUE / WiC
    category: Language
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_WiC
- wsc:
    name: SuperGLUE / WSC
    category: Language / WSC
    paper: https://proceedings.neurips.cc/paper_files/paper/2019/file/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf
    configpath: opencompass/configs/datasets/SuperGLUE_WSC
- winogrande:
    name: WinoGrande
    category: Language / WSC
    paper: https://arxiv.org/pdf/1907.10641v2
    configpath: opencompass/configs/datasets/winogrande
- xcopa:
    name: XCOPA
    category: Language
    paper: https://arxiv.org/pdf/2005.00333
    configpath: opencompass/configs/datasets/XCOPA
- xiezhi:
    name: Xiezhi
    category: Knowledge
    paper: https://arxiv.org/pdf/2306.05783
    configpath: opencompass/configs/datasets/xiezhi
- xlsum:
    name: XLSum
    category: Understanding
    paper: https://arxiv.org/pdf/2106.13822v1
    configpath: opencompass/configs/datasets/XLSum
- xsum:
    name: Xsum
    category: Understanding
    paper: https://arxiv.org/pdf/1808.08745
    configpath: opencompass/configs/datasets/Xsum