make_table_tasks.py 2.73 KB
Newer Older
jon-tow's avatar
jon-tow committed
1
2
"""
Usage:
baberabb's avatar
baberabb committed
3
   Writes csv and Markdown table to csv_file, md_file (below).
jon-tow's avatar
jon-tow committed
4
5
"""
import logging
baberabb's avatar
baberabb committed
6
from pathlib import Path
7

baberabb's avatar
baberabb committed
8
9
import datasets
import pandas as pd
Leo Gao's avatar
Leo Gao committed
10

11
from lm_eval import tasks
baberabb's avatar
baberabb committed
12
13
from lm_eval.tasks import TASK_REGISTRY
from tests.utils import new_tasks
14

Leo Gao's avatar
Leo Gao committed
15

jon-tow's avatar
jon-tow committed
16
17
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baberabb's avatar
baberabb committed
18
19
datasets.disable_caching()
tasks.initialize_tasks()
Leo Gao's avatar
Leo Gao committed
20

Fabrizio Milo's avatar
Fabrizio Milo committed
21

jon-tow's avatar
jon-tow committed
22
def check(tf):
Leo Gao's avatar
Leo Gao committed
23
    if tf:
Fabrizio Milo's avatar
Fabrizio Milo committed
24
        return "✓"
Leo Gao's avatar
Leo Gao committed
25
    else:
Fabrizio Milo's avatar
Fabrizio Milo committed
26
27
        return " "

Leo Gao's avatar
Leo Gao committed
28

baberabb's avatar
baberabb committed
29
30
31
32
33
34
35
36
37
38
39
def maketable(df):
    headers = [
        "Task Name",
        "Group",
        "Train",
        "Val",
        "Test",
        "Val/Test Docs",
        "Request Type,",
        "Metrics",
    ]
jon-tow's avatar
jon-tow committed
40
    values = []
baberabb's avatar
baberabb committed
41
42
43
44
45
46
    if not df:
        _tasks = tasks.TASK_REGISTRY.items()
        _tasks = sorted(_tasks, key=lambda x: x[0])
    else:
        task_classes = new_tasks()
        _tasks = [(x, TASK_REGISTRY.get(x)) for x in task_classes]
baberabb's avatar
test  
baberabb committed
47
    count = 0
baberabb's avatar
baberabb committed
48
    for tname, Task in _tasks:
jon-tow's avatar
jon-tow committed
49
50
51
        task = Task()
        v = [
            tname,
baberabb's avatar
baberabb committed
52
            task.config.group,
jon-tow's avatar
jon-tow committed
53
54
55
56
57
            check(task.has_training_docs()),
            check(task.has_validation_docs()),
            check(task.has_test_docs()),
            len(
                list(
baberabb's avatar
baberabb committed
58
59
60
61
62
                    task.test_docs()
                    if task.has_test_docs()
                    else task.validation_docs()
                    if task.has_validation_docs()
                    else task.training_docs()
jon-tow's avatar
jon-tow committed
63
64
                )
            ),
baberabb's avatar
baberabb committed
65
            task.config.output_type,
jon-tow's avatar
jon-tow committed
66
67
68
69
            ", ".join(task.aggregation().keys()),
        ]
        logger.info(v)
        values.append(v)
baberabb's avatar
test  
baberabb committed
70
71
72
        count += 1
        if count == 10:
            break
baberabb's avatar
baberabb committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
    if not df:
        df = pd.DataFrame(values, columns=headers)
        table = df.to_markdown(index=False)
    else:
        for new_row in values:
            tname = new_row[0]
            if tname in df["Task Name"].values:
                # If task name exists, update the row
                df.loc[df["Task Name"] == tname] = new_row
            else:
                # If task name doesn't exist, append a new row
                series = pd.Series(new_row, index=df.columns)
                df = pd.concat([df, series.to_frame().T], ignore_index=True)
        df = df.sort_values(by=["Task Name"])
        table = df.to_markdown(index=False)
    return df, table


if __name__ == "__main__":
baberabb's avatar
test  
baberabb committed
92
93
    csv_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.csv")
    md_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.md")
baberabb's avatar
baberabb committed
94
95
96
97
98
99
100
101
102

    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        df = None

    df, table = maketable(df=df)

    with open(md_file, "w") as f:
jon-tow's avatar
jon-tow committed
103
        f.write(table)
baberabb's avatar
baberabb committed
104
105
    with open(csv_file, "w") as f:
        df.to_csv(f, index=False)