make_table_tasks.py 2.66 KB
Newer Older
jon-tow's avatar
jon-tow committed
1
2
"""
Usage:
baberabb's avatar
baberabb committed
3
   Writes csv and Markdown table to csv_file, md_file (below).
jon-tow's avatar
jon-tow committed
4
5
"""
import logging
baberabb's avatar
baberabb committed
6
from pathlib import Path
7

baberabb's avatar
baberabb committed
8
9
import datasets
import pandas as pd
Leo Gao's avatar
Leo Gao committed
10

11
from lm_eval import tasks
baberabb's avatar
baberabb committed
12
13
from lm_eval.tasks import TASK_REGISTRY
from tests.utils import new_tasks
14

Leo Gao's avatar
Leo Gao committed
15

jon-tow's avatar
jon-tow committed
16
17
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baberabb's avatar
baberabb committed
18
19
datasets.disable_caching()
tasks.initialize_tasks()
Leo Gao's avatar
Leo Gao committed
20

Fabrizio Milo's avatar
Fabrizio Milo committed
21

jon-tow's avatar
jon-tow committed
22
def check(tf):
Leo Gao's avatar
Leo Gao committed
23
    if tf:
Fabrizio Milo's avatar
Fabrizio Milo committed
24
        return "✓"
Leo Gao's avatar
Leo Gao committed
25
    else:
Fabrizio Milo's avatar
Fabrizio Milo committed
26
27
        return " "

Leo Gao's avatar
Leo Gao committed
28

baberabb's avatar
baberabb committed
29
30
31
32
33
34
35
36
37
38
39
def maketable(df):
    headers = [
        "Task Name",
        "Group",
        "Train",
        "Val",
        "Test",
        "Val/Test Docs",
        "Request Type,",
        "Metrics",
    ]
jon-tow's avatar
jon-tow committed
40
    values = []
baberabb's avatar
baberabb committed
41
42
43
44
45
46
47
    if not df:
        _tasks = tasks.TASK_REGISTRY.items()
        _tasks = sorted(_tasks, key=lambda x: x[0])
    else:
        task_classes = new_tasks()
        _tasks = [(x, TASK_REGISTRY.get(x)) for x in task_classes]
    for tname, Task in _tasks:
jon-tow's avatar
jon-tow committed
48
49
50
        task = Task()
        v = [
            tname,
baberabb's avatar
baberabb committed
51
            task.config.group,
jon-tow's avatar
jon-tow committed
52
53
54
55
56
            check(task.has_training_docs()),
            check(task.has_validation_docs()),
            check(task.has_test_docs()),
            len(
                list(
baberabb's avatar
baberabb committed
57
58
59
60
61
                    task.test_docs()
                    if task.has_test_docs()
                    else task.validation_docs()
                    if task.has_validation_docs()
                    else task.training_docs()
jon-tow's avatar
jon-tow committed
62
63
                )
            ),
baberabb's avatar
baberabb committed
64
            task.config.output_type,
jon-tow's avatar
jon-tow committed
65
66
67
68
            ", ".join(task.aggregation().keys()),
        ]
        logger.info(v)
        values.append(v)
baberabb's avatar
baberabb committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
    if not df:
        df = pd.DataFrame(values, columns=headers)
        table = df.to_markdown(index=False)
    else:
        for new_row in values:
            tname = new_row[0]
            if tname in df["Task Name"].values:
                # If task name exists, update the row
                df.loc[df["Task Name"] == tname] = new_row
            else:
                # If task name doesn't exist, append a new row
                series = pd.Series(new_row, index=df.columns)
                df = pd.concat([df, series.to_frame().T], ignore_index=True)
        df = df.sort_values(by=["Task Name"])
        table = df.to_markdown(index=False)
    return df, table


if __name__ == "__main__":
    csv_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_guide.csv")
    md_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_guide.md")

    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        df = None

    df, table = maketable(df=df)

    with open(md_file, "w") as f:
jon-tow's avatar
jon-tow committed
99
        f.write(table)
baberabb's avatar
baberabb committed
100
101
    with open(csv_file, "w") as f:
        df.to_csv(f, index=False)