make_table_tasks.py 4.07 KB
Newer Older
jon-tow's avatar
jon-tow committed
1
2
"""
Usage:
baberabb's avatar
baberabb committed
3
   Writes csv and Markdown table to csv_file, md_file (below).
jon-tow's avatar
jon-tow committed
4
5
"""
import logging
baberabb's avatar
test  
baberabb committed
6
import os
baberabb's avatar
baberabb committed
7
from pathlib import Path
baberabb's avatar
test  
baberabb committed
8
from typing import List, Union
9

baberabb's avatar
baberabb committed
10
11
import datasets
import pandas as pd
12
from tqdm import tqdm
Leo Gao's avatar
Leo Gao committed
13

14
from lm_eval import tasks
baberabb's avatar
test  
baberabb committed
15
from lm_eval.utils import load_yaml_config
16

Leo Gao's avatar
Leo Gao committed
17

jon-tow's avatar
jon-tow committed
18
19
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baberabb's avatar
baberabb committed
20
datasets.disable_caching()
21
task_manager = tasks.TaskManager()
Leo Gao's avatar
Leo Gao committed
22

Fabrizio Milo's avatar
Fabrizio Milo committed
23

baberabb's avatar
test  
baberabb committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def load_changed_files(file_path: str) -> List[str]:
    with open(file_path, "r") as f:
        content = f.read()
        words_list = [x for x in content.split()]
    return words_list


def parser(full_path: List[str]) -> List[str]:
    _output = set()
    for x in full_path:
        if x.endswith(".yaml"):
            _output.add(load_yaml_config(x)["task"])
        elif x.endswith(".py"):
            path = [str(x) for x in (list(Path(x).parent.glob("*.yaml")))]
            _output |= {load_yaml_config(x)["task"] for x in path}
    return list(_output)


42
43
def new_tasks(df=None) -> Union[List[str], None]:
    new_tasks = []
baberabb's avatar
test  
baberabb committed
44
45
46
47
    FILENAME = ".github/outputs/tasks_all_changed_and_modified_files.txt"
    if os.path.exists(FILENAME):
        # If tasks folder has changed then we get the list of files from FILENAME
        # and parse the yaml files to get the task names.
48
49
50
51
52
53
54
55
56
        # (for when run in CI)
        new_tasks.extend(parser(load_changed_files(FILENAME)))
    # if we already have a (partial) task table created, only add tasks
    # which aren't already in task table
    if df is not None:
        _tasks = task_manager.all_tasks
        _tasks = [k for k in _tasks if k not in df["Task Name"].values]

        new_tasks.extend(_tasks)
baberabb's avatar
test  
baberabb committed
57
    # if both not true just do arc_easy
58
    return new_tasks
baberabb's avatar
test  
baberabb committed
59
60


jon-tow's avatar
jon-tow committed
61
def check(tf):
Leo Gao's avatar
Leo Gao committed
62
    if tf:
Fabrizio Milo's avatar
Fabrizio Milo committed
63
        return "✓"
Leo Gao's avatar
Leo Gao committed
64
    else:
Fabrizio Milo's avatar
Fabrizio Milo committed
65
66
        return " "

Leo Gao's avatar
Leo Gao committed
67

baberabb's avatar
baberabb committed
68
69
def maketable(df):
    headers = [
haileyschoelkopf's avatar
cleanup  
haileyschoelkopf committed
70
71
        # For now, we restrict to presenting data
        # That can be collected statically.
baberabb's avatar
baberabb committed
72
73
        "Task Name",
        "Group",
74
75
76
77
78
79
        # "Train",
        # "Val",
        # "Test",
        # "Val/Test Docs",
        "Request Type",
        "Filters",
baberabb's avatar
baberabb committed
80
81
        "Metrics",
    ]
jon-tow's avatar
jon-tow committed
82
    values = []
83
84
    if df is None:
        _tasks = task_manager.all_tasks
baberabb's avatar
baberabb committed
85
    else:
86
        _tasks = new_tasks(df=df)
haileyschoelkopf's avatar
cleanup  
haileyschoelkopf committed
87

88
89
90
91
92
93
94
95
96
    for tname in tqdm(_tasks):
        task_config = task_manager._get_config(tname)
        if not task_config:
            continue
        # TODO: also catch benchmark configs like flan
        if not isinstance(task_config["task"], str):
            continue
        if task_config.get("class", None):
            continue
jon-tow's avatar
jon-tow committed
97
98
        v = [
            tname,
99
100
101
102
103
            task_config.get("group", None),
            task_config.get("output_type", "greedy_until"),
            ", ".join(
                str(f["name"])
                for f in task_config.get("filter_list", [{"name": "none"}])
jon-tow's avatar
jon-tow committed
104
            ),
105
            ", ".join(str(metric["metric"]) for metric in task_config["metric_list"]),
jon-tow's avatar
jon-tow committed
106
        ]
haileyschoelkopf's avatar
cleanup  
haileyschoelkopf committed
107

jon-tow's avatar
jon-tow committed
108
109
        logger.info(v)
        values.append(v)
110
111

    if df is None:
baberabb's avatar
baberabb committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
        df = pd.DataFrame(values, columns=headers)
        table = df.to_markdown(index=False)
    else:
        for new_row in values:
            tname = new_row[0]
            if tname in df["Task Name"].values:
                # If task name exists, update the row
                df.loc[df["Task Name"] == tname] = new_row
            else:
                # If task name doesn't exist, append a new row
                series = pd.Series(new_row, index=df.columns)
                df = pd.concat([df, series.to_frame().T], ignore_index=True)
        df = df.sort_values(by=["Task Name"])
        table = df.to_markdown(index=False)
    return df, table


if __name__ == "__main__":
baberabb's avatar
test  
baberabb committed
130
131
    csv_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.csv")
    md_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.md")
baberabb's avatar
baberabb committed
132
133
134
135
136

    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        df = None
137
    df = None
baberabb's avatar
baberabb committed
138
139
140
    df, table = maketable(df=df)

    with open(md_file, "w") as f:
jon-tow's avatar
jon-tow committed
141
        f.write(table)
baberabb's avatar
baberabb committed
142
143
    with open(csv_file, "w") as f:
        df.to_csv(f, index=False)