__init__.py 5.25 KB
Newer Older
&'s avatar
& committed
1
2
from pprint import pprint

&'s avatar
& committed
3
4
import sacrebleu

Jason Phang's avatar
Jason Phang committed
5
6
from . import superglue
from . import glue
Leo Gao's avatar
Leo Gao committed
7
from . import arc
thefazzer's avatar
thefazzer committed
8
from . import coqa
Leo Gao's avatar
Leo Gao committed
9
from . import race
Leo Gao's avatar
Leo Gao committed
10
from . import webqs
Leo Gao's avatar
Leo Gao committed
11
from . import anli
12
from . import wsc273
Charles Foster's avatar
Charles Foster committed
13
from . import winogrande
Charles Foster's avatar
Charles Foster committed
14
from . import quac
Charles Foster's avatar
Charles Foster committed
15
from . import hellaswag
Charles Foster's avatar
Charles Foster committed
16
from . import openbookqa
Charles Foster's avatar
Charles Foster committed
17
from . import squad
18
from . import naturalqs
19
from . import sat
20
from . import arithmetic
Leo Gao's avatar
Leo Gao committed
21
from . import lambada
Jon Tow's avatar
Jon Tow committed
22
from . import race
Leo Gao's avatar
Leo Gao committed
23
from . import piqa
24
from . import triviaqa
jeffhsu3's avatar
jeffhsu3 committed
25
from . import pubmedqa
26
from . import sciq
27
from . import webqs
28
from . import qa4mre
&'s avatar
& committed
29
from . import translation
30
31
from . import headqa
from . import mathqa
Muennighoff's avatar
Muennighoff committed
32
from . import ethics
Jon Tow's avatar
Jon Tow committed
33
from . import drop
34
from . import unscramble
35
from . import logiqa
Andy Zou's avatar
Andy Zou committed
36
from . import hendrycks_test
Eric Tang's avatar
Eric Tang committed
37
from . import math
Jason Phang's avatar
gpt3  
Jason Phang committed
38

&'s avatar
& committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
########################################
# Translation tasks
########################################

# 6 total
gpt3_translation_benchmarks = {
    "wmt14": ['en-fr', 'fr-en'],  # French
    "wmt16": ['en-ro', 'ro-en', 'de-en', 'en-de'],  # German, Romanian
}


# 28 total
selected_translation_benchmarks = {
    **gpt3_translation_benchmarks,
    "wmt20": sacrebleu.get_langpairs_for_testset("wmt20"),
    "iwslt17": ['en-ar', 'ar-en']  # Arabic
}

# 319 total
all_translation_benchmarks = {
    ts: sacrebleu.get_langpairs_for_testset(ts)
    for ts in sacrebleu.get_available_testsets()
}


########################################
# All tasks
########################################
Jason Phang's avatar
gpt3  
Jason Phang committed
67
68


Jason Phang's avatar
Jason Phang committed
69
TASK_REGISTRY = {
Jason Phang's avatar
multirc  
Jason Phang committed
70
    # GLUE
Jason Phang's avatar
Jason Phang committed
71
72
    "cola": glue.CoLA,
    "mnli": glue.MNLI,
Jason Phang's avatar
Jason Phang committed
73
    "mnli_mismatched": glue.MNLIMismatched,
Jason Phang's avatar
Jason Phang committed
74
75
76
77
    "mrpc": glue.MRPC,
    "rte": glue.RTE,
    "qnli": glue.QNLI,
    "qqp": glue.QQP,
78
    #"stsb": glue.STSB, # not implemented yet
Jason Phang's avatar
Jason Phang committed
79
80
    "sst": glue.SST,
    "wnli": glue.WNLI,
Jason Phang's avatar
multirc  
Jason Phang committed
81
    # SuperGLUE
Jason Phang's avatar
Jason Phang committed
82
    "boolq": superglue.BoolQ,
thefazzer's avatar
thefazzer committed
83
    "cb": superglue.CommitmentBank,
Jason Phang's avatar
Jason Phang committed
84
    "copa": superglue.Copa,
Jason Phang's avatar
multirc  
Jason Phang committed
85
    "multirc": superglue.MultiRC,
Jason Phang's avatar
Jason Phang committed
86
    "record": superglue.ReCoRD,
Jason Phang's avatar
Jason Phang committed
87
    "wic": superglue.WordsInContext,
Jason Phang's avatar
wsc  
Jason Phang committed
88
    "wsc": superglue.SGWinogradSchemaChallenge,
89
    
Jason Phang's avatar
multirc  
Jason Phang committed
90
    # Order by benchmark/genre?
thefazzer's avatar
thefazzer committed
91
    "coqa": coqa.CoQA,
Jon Tow's avatar
Jon Tow committed
92
    "drop": drop.DROP,
Leo Gao's avatar
Leo Gao committed
93
    "lambada": lambada.LAMBADA,
Leo Gao's avatar
Leo Gao committed
94
    "piqa": piqa.PiQA,
Leo Gao's avatar
Leo Gao committed
95

96
    # Science related
jeffhsu3's avatar
jeffhsu3 committed
97
    "pubmedqa" : pubmedqa.Pubmed_QA,
98
    "sciq" : sciq.SciQ,
99
100
101
102
    #"qa4mre" : qa4mre.QA4MRE,
    "qa4mre_2011" : qa4mre.QA4MRE_2011,
    "qa4mre_2012" : qa4mre.QA4MRE_2012,
    "qa4mre_2013" : qa4mre.QA4MRE_2013,
jeffhsu3's avatar
jeffhsu3 committed
103

Leo Gao's avatar
Leo Gao committed
104
    #"triviaqa": triviaqa.TriviaQA,
Jonathan Tow's avatar
Jonathan Tow committed
105
106
    "arc_easy": arc.ARCEasy,
    "arc_challenge": arc.ARCChallenge,
107
    # "quac": quac.QuAC, # not implemented yet
108
    "logiqa": logiqa.LogiQA,
109
    "hellaswag": hellaswag.HellaSwag, # not implemented yet
Jon Tow's avatar
Jon Tow committed
110
    "openbookqa": openbookqa.OpenBookQA,
111
    # "sat": sat.SATAnalogies, # not implemented yet
Leo Gao's avatar
Leo Gao committed
112
    "squad2": squad.SQuAD2,
Jon Tow's avatar
Jon Tow committed
113
    "race": race.RACE,
114
    # "naturalqs": naturalqs.NaturalQs, # not implemented yet
115
116
    "headqa": headqa.HeadQA,
    "mathqa": mathqa.MathQA,
117
    "webqs": webqs.WebQs,
118
    "wsc273": wsc273.WinogradSchemaChallenge273,
119
    "winogrande": winogrande.Winogrande,
Jonathan Tow's avatar
Jonathan Tow committed
120
121
122
    "anli_r1": anli.ANLIRound1,
    "anli_r2": anli.ANLIRound2,
    "anli_r3": anli.ANLIRound3,
Muennighoff's avatar
Muennighoff committed
123
124
125

    "ethics_cm": ethics.EthicsCM,
    "ethics_deontology": ethics.EthicsDeontology,
Muennighoff's avatar
Muennighoff committed
126
    "ethics_justice": ethics.EthicsJustice,
Leo Gao's avatar
Leo Gao committed
127
    "ethics_utilitarianism_original": ethics.EthicsUtilitarianismOriginal,
Muennighoff's avatar
Muennighoff committed
128
129
    "ethics_utilitarianism": ethics.EthicsUtilitarianism,
    "ethics_virtue": ethics.EthicsVirtue,
Muennighoff's avatar
Muennighoff committed
130

Eric Tang's avatar
Eric Tang committed
131
132
133
134
135
136
137
138
139
    # math
    "math_algebra": math.MathAlgebra,
    "math_counting_and_prob": math.MathCountingAndProbability,
    "math_geometry": math.MathGeometry,
    "math_intermediate_algebra": math.MathIntermediateAlgebra,
    "math_num_theory": math.MathNumberTheory,
    "math_prealgebra": math.MathPrealgebra,
    "math_precalc": math.MathPrecalculus,

140
141
142
143
144
145
146
147
148
149
150
    # arithmetic
    "arithmetic_2da": arithmetic.Arithmetic2DPlus,
    "arithmetic_2ds": arithmetic.Arithmetic2DMinus,
    "arithmetic_3da": arithmetic.Arithmetic3DPlus,
    "arithmetic_3ds": arithmetic.Arithmetic3DMinus,
    "arithmetic_4da": arithmetic.Arithmetic4DPlus,
    "arithmetic_4ds": arithmetic.Arithmetic4DMinus,
    "arithmetic_5da": arithmetic.Arithmetic5DPlus,
    "arithmetic_5ds": arithmetic.Arithmetic5DMinus,
    "arithmetic_2dm": arithmetic.Arithmetic2DMultiplication,
    "arithmetic_1dc": arithmetic.Arithmetic1DComposite,
&'s avatar
& committed
151
152
    # TODO Perhaps make these groups of tasks
    #   e.g. anli, arithmetic, openai_translations, harness_translations
&'s avatar
...  
& committed
153

Andy Zou's avatar
Andy Zou committed
154
    # hendrycksTest (57 tasks)
Andy Zou's avatar
Andy Zou committed
155
    **hendrycks_test.create_all_tasks(),
Andy Zou's avatar
Andy Zou committed
156

&'s avatar
...  
& committed
157
    # e.g. wmt14-fr-en
&'s avatar
& committed
158
159
160
    **translation.create_tasks_from_benchmarks(gpt3_translation_benchmarks),
    # chef's selection, mostly wmt20
    **translation.create_tasks_from_benchmarks(selected_translation_benchmarks),
161

Jon Tow's avatar
Jon Tow committed
162
    # Word Scrambling and Manipulation Tasks
163
164
165
166
167
    "anagrams1": unscramble.Anagrams1,
    "anagrams2": unscramble.Anagrams2,
    "cycle_letters": unscramble.CycleLetters,
    "random_insertion": unscramble.RandomInsertion,
    "reversed_words": unscramble.ReversedWords,
Jason Phang's avatar
Jason Phang committed
168
}
Jason Phang's avatar
gpt3  
Jason Phang committed
169
170


Jason Phang's avatar
Jason Phang committed
171
ALL_TASKS = sorted(list(TASK_REGISTRY))
Jason Phang's avatar
Jason Phang committed
172
173


Jason Phang's avatar
cleanup  
Jason Phang committed
174
def get_task(task_name):
&'s avatar
& committed
175
176
177
178
179
180
    try:
        return TASK_REGISTRY[task_name]
    except KeyError as e:
        print("Available tasks:")
        pprint(TASK_REGISTRY)
        raise KeyError(f"Missing task {task_name}")
Jason Phang's avatar
cleanup  
Jason Phang committed
181
182
183
184
185
186
187


def get_task_dict(task_name_list):
    return {
        task_name: get_task(task_name)()
        for task_name in task_name_list
    }