make_table_tasks.py 2.73 KB
Newer Older
jon-tow's avatar
jon-tow committed
1
2
"""
Usage:
baberabb's avatar
baberabb committed
3
   Writes csv and Markdown table to csv_file, md_file (below).
jon-tow's avatar
jon-tow committed
4
5
"""
import logging
baberabb's avatar
baberabb committed
6
from pathlib import Path
7

baberabb's avatar
baberabb committed
8
9
import datasets
import pandas as pd
Leo Gao's avatar
Leo Gao committed
10

11
from lm_eval import tasks
baberabb's avatar
baberabb committed
12
from lm_eval.tasks import TASK_REGISTRY
baberabb's avatar
test  
baberabb committed
13
14

from ..tests.utils import new_tasks
15

Leo Gao's avatar
Leo Gao committed
16

jon-tow's avatar
jon-tow committed
17
18
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
baberabb's avatar
baberabb committed
19
20
datasets.disable_caching()
tasks.initialize_tasks()
Leo Gao's avatar
Leo Gao committed
21

Fabrizio Milo's avatar
Fabrizio Milo committed
22

jon-tow's avatar
jon-tow committed
23
def check(tf):
Leo Gao's avatar
Leo Gao committed
24
    if tf:
Fabrizio Milo's avatar
Fabrizio Milo committed
25
        return "✓"
Leo Gao's avatar
Leo Gao committed
26
    else:
Fabrizio Milo's avatar
Fabrizio Milo committed
27
28
        return " "

Leo Gao's avatar
Leo Gao committed
29

baberabb's avatar
baberabb committed
30
31
32
33
34
35
36
37
38
39
40
def maketable(df):
    headers = [
        "Task Name",
        "Group",
        "Train",
        "Val",
        "Test",
        "Val/Test Docs",
        "Request Type,",
        "Metrics",
    ]
jon-tow's avatar
jon-tow committed
41
    values = []
baberabb's avatar
baberabb committed
42
43
44
45
46
47
    if not df:
        _tasks = tasks.TASK_REGISTRY.items()
        _tasks = sorted(_tasks, key=lambda x: x[0])
    else:
        task_classes = new_tasks()
        _tasks = [(x, TASK_REGISTRY.get(x)) for x in task_classes]
baberabb's avatar
test  
baberabb committed
48
    count = 0
baberabb's avatar
baberabb committed
49
    for tname, Task in _tasks:
jon-tow's avatar
jon-tow committed
50
51
52
        task = Task()
        v = [
            tname,
baberabb's avatar
baberabb committed
53
            task.config.group,
jon-tow's avatar
jon-tow committed
54
55
56
57
58
            check(task.has_training_docs()),
            check(task.has_validation_docs()),
            check(task.has_test_docs()),
            len(
                list(
baberabb's avatar
baberabb committed
59
60
61
62
63
                    task.test_docs()
                    if task.has_test_docs()
                    else task.validation_docs()
                    if task.has_validation_docs()
                    else task.training_docs()
jon-tow's avatar
jon-tow committed
64
65
                )
            ),
baberabb's avatar
baberabb committed
66
            task.config.output_type,
jon-tow's avatar
jon-tow committed
67
68
69
70
            ", ".join(task.aggregation().keys()),
        ]
        logger.info(v)
        values.append(v)
baberabb's avatar
test  
baberabb committed
71
72
73
        count += 1
        if count == 10:
            break
baberabb's avatar
baberabb committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    if not df:
        df = pd.DataFrame(values, columns=headers)
        table = df.to_markdown(index=False)
    else:
        for new_row in values:
            tname = new_row[0]
            if tname in df["Task Name"].values:
                # If task name exists, update the row
                df.loc[df["Task Name"] == tname] = new_row
            else:
                # If task name doesn't exist, append a new row
                series = pd.Series(new_row, index=df.columns)
                df = pd.concat([df, series.to_frame().T], ignore_index=True)
        df = df.sort_values(by=["Task Name"])
        table = df.to_markdown(index=False)
    return df, table


if __name__ == "__main__":
baberabb's avatar
test  
baberabb committed
93
94
    csv_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.csv")
    md_file = Path(f"{Path(__file__).parent.parent.resolve()}/docs/task_table.md")
baberabb's avatar
baberabb committed
95
96
97
98
99
100
101
102
103

    try:
        df = pd.read_csv(csv_file)
    except FileNotFoundError:
        df = None

    df, table = maketable(df=df)

    with open(md_file, "w") as f:
jon-tow's avatar
jon-tow committed
104
        f.write(table)
baberabb's avatar
baberabb committed
105
106
    with open(csv_file, "w") as f:
        df.to_csv(f, index=False)