make_table_tasks.py 5.36 KB
Newer Older
jon-tow's avatar
jon-tow committed
1
2
"""
Usage:
baberabb's avatar
baberabb committed
3
   Writes csv and Markdown table to csv_file, md_file (below).
jon-tow's avatar
jon-tow committed
4
5
"""
import logging
baberabb's avatar
test  
baberabb committed
6
import os
baberabb's avatar
baberabb committed
7
from pathlib import Path
baberabb's avatar
test  
baberabb committed
8
from typing import List, Union
9

baberabb's avatar
baberabb committed
10
11
import datasets
import pandas as pd
12
from tqdm import tqdm
Leo Gao's avatar
Leo Gao committed
13

14
from lm_eval import tasks
baberabb's avatar
test  
baberabb committed
15
from lm_eval.utils import load_yaml_config
16

Leo Gao's avatar
Leo Gao committed
17

jon-tow's avatar
jon-tow committed
18
19
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baberabb's avatar
baberabb committed
20
datasets.disable_caching()
21
task_manager = tasks.TaskManager()
Leo Gao's avatar
Leo Gao committed
22

Fabrizio Milo's avatar
Fabrizio Milo committed
23

baberabb's avatar
test  
baberabb committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def load_changed_files(file_path: str) -> List[str]:
    with open(file_path, "r") as f:
        content = f.read()
        words_list = [x for x in content.split()]
    return words_list


def parser(full_path: List[str]) -> List[str]:
    _output = set()
    for x in full_path:
        if x.endswith(".yaml"):
            _output.add(load_yaml_config(x)["task"])
        elif x.endswith(".py"):
            path = [str(x) for x in (list(Path(x).parent.glob("*.yaml")))]
            _output |= {load_yaml_config(x)["task"] for x in path}
    return list(_output)


42
43
def new_tasks(df=None) -> Union[List[str], None]:
    new_tasks = []
baberabb's avatar
test  
baberabb committed
44
45
46
47
    FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
    if os.path.exists(FILENAME):
        # If tasks folder has changed then we get the list of files from FILENAME
        # and parse the yaml files to get the task names.
48
49
50
51
52
53
54
55
56
        # (for when run in CI)
        new_tasks.extend(parser(load_changed_files(FILENAME)))
    # if we already have a (partial) task table created, only add tasks
    # which aren't already in task table
    if df is not None:
        _tasks = task_manager.all_tasks
        _tasks = [k for k in _tasks if k not in df["Task Name"].values]

        new_tasks.extend(_tasks)
baberabb's avatar
test  
baberabb committed
57
    # if both not true just do arc_easy
58
    return new_tasks
baberabb's avatar
test  
baberabb committed
59
60


jon-tow's avatar
jon-tow committed
61
def check(tf):
Leo Gao's avatar
Leo Gao committed
62
    if tf:
Fabrizio Milo's avatar
Fabrizio Milo committed
63
        return "✓"
Leo Gao's avatar
Leo Gao committed
64
    else:
Fabrizio Milo's avatar
Fabrizio Milo committed
65
66
        return " "

Leo Gao's avatar
Leo Gao committed
67

baberabb's avatar
baberabb committed
68
69
70
71
def maketable(df):
    headers = [
        "Task Name",
        "Group",
72
73
74
75
76
77
        # "Train",
        # "Val",
        # "Test",
        # "Val/Test Docs",
        "Request Type",
        "Filters",
baberabb's avatar
baberabb committed
78
79
        "Metrics",
    ]
jon-tow's avatar
jon-tow committed
80
    values = []
81
82
    if df is None:
        _tasks = task_manager.all_tasks
baberabb's avatar
baberabb committed
83
    else:
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
        _tasks = new_tasks(df=df)
        # _tasks = [(x, task_manager.load_task_or_group(x)) for x in task_classes]
    # count = 0
    for tname in tqdm(_tasks):
        print(tname)
        # try:
        # if not tname in ["advanced_ai_risk", "arithmetic", "bbh_fewshot", "bbh_cot_fewshot", "bbh_cot_zeroshot"]:
        #     task = task_manager.load_task_or_group(tname)
        # else:
        #     continue
        # if isinstance(list(task.values())[0], tuple): # is group, not a solo task
        # del task
        # continue
        # else:
        #     task = task[tname]
        # # except Exception as e:
        # #     print(e)
        # #     continue
        task_config = task_manager._get_config(tname)
        if not task_config:
            continue
        # TODO: also catch benchmark configs like flan
        if not isinstance(task_config["task"], str):
            continue
        if task_config.get("class", None):
            continue
jon-tow's avatar
jon-tow committed
110
111
        v = [
            tname,
112
113
114
115
116
117
118
119
120
            task_config.get("group", None),
            # check(True),
            # check(True),
            # check(True),
            # -1,
            task_config.get("output_type", "greedy_until"),
            ", ".join(
                str(f["name"])
                for f in task_config.get("filter_list", [{"name": "none"}])
jon-tow's avatar
jon-tow committed
121
            ),
122
            ", ".join(str(metric["metric"]) for metric in task_config["metric_list"]),
jon-tow's avatar
jon-tow committed
123
        ]
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
        # v = [
        #     tname,
        #     task.CONFIG.group,
        #     check(task.has_training_docs()),
        #     check(task.has_validation_docs()),
        #     check(task.has_test_docs()),
        #     len(
        #         list(
        #             task.test_docs()
        #             if task.has_test_docs()
        #             else task.validation_docs()
        #             if task.has_validation_docs()
        #             else task.training_docs()
        #         )
        #     ),
        #     task.config.output_type,
        #     ", ".join(task.aggregation().keys()),
        # ]
jon-tow's avatar
jon-tow committed
142
143
        logger.info(v)
        values.append(v)
144
145
146
147
148
149
        # count += 1
        # if count >= 20:
        #     break

        # del task
    if df is None:
baberabb's avatar
baberabb committed
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
        df = pd.DataFrame(values, columns=headers)
        table = df.to_markdown(index=False)
    else:
        for new_row in values:
            tname = new_row[0]
            if tname in df["Task Name"].values:
                # If task name exists, update the row
                df.loc[df["Task Name"] == tname] = new_row
            else:
                # If task name doesn't exist, append a new row
                series = pd.Series(new_row, index=df.columns)
                df = pd.concat([df, series.to_frame().T], ignore_index=True)
        df = df.sort_values(by=["Task Name"])
        table = df.to_markdown(index=False)
    return df, table


if __name__ == "__main__":
baberabb's avatar
test  
baberabb committed
168
169
    csv_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.csv")
    md_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.md")
baberabb's avatar
baberabb committed
170
171
172
173
174

    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        df = None
175
    df = None
baberabb's avatar
baberabb committed
176
177
178
    df, table = maketable(df=df)

    with open(md_file, "w") as f:
jon-tow's avatar
jon-tow committed
179
        f.write(table)
baberabb's avatar
baberabb committed
180
181
    with open(csv_file, "w") as f:
        df.to_csv(f, index=False)