ls.py 3.32 KB
Newer Older
Baber's avatar
Baber committed
1
import argparse
Baber's avatar
cleanup  
Baber committed
2
import textwrap
Baber's avatar
Baber committed
3

Baber's avatar
cleanup  
Baber committed
4
from lm_eval._cli.subcommand import SubCommand
Baber's avatar
Baber committed
5
6


Baber's avatar
nit  
Baber committed
7
class List(SubCommand):
Baber's avatar
Baber committed
8
9
10
11
12
    """Command for listing available tasks."""

    def __init__(self, subparsers: argparse._SubParsersAction, *args, **kwargs):
        # Create and configure the parser
        super().__init__(*args, **kwargs)
Baber's avatar
cleanup  
Baber committed
13
        self._parser = subparsers.add_parser(
Baber's avatar
nit  
Baber committed
14
            "ls",
Baber's avatar
Baber committed
15
16
            help="List available tasks, groups, subtasks, or tags",
            description="List available tasks, groups, subtasks, or tags from the evaluation harness.",
Baber's avatar
cleanup  
Baber committed
17
            usage="lm-eval list [tasks|groups|subtasks|tags] [--include_path DIR]",
Baber's avatar
cleanup  
Baber committed
18
19
20
            epilog=textwrap.dedent("""
                examples:
                  # List all available tasks (includes groups, subtasks, and tags)
Baber's avatar
nit  
Baber committed
21
                  $ lm-eval ls tasks
Baber's avatar
Baber committed
22

Baber's avatar
cleanup  
Baber committed
23
                  # List only task groups (like 'mmlu', 'glue', 'superglue')
Baber's avatar
nit  
Baber committed
24
                  $ lm-eval ls groups
Baber's avatar
Baber committed
25

Baber's avatar
cleanup  
Baber committed
26
                  # List only individual subtasks (like 'mmlu_abstract_algebra')
Baber's avatar
nit  
Baber committed
27
                  $ lm-eval ls subtasks
Baber's avatar
Baber committed
28

Baber's avatar
cleanup  
Baber committed
29
                  # Include external task definitions
Baber's avatar
nit  
Baber committed
30
                  $ lm-eval ls tasks --include_path /path/to/external/tasks
Baber's avatar
Baber committed
31

Baber's avatar
cleanup  
Baber committed
32
                  # List tasks from multiple external paths
Baber's avatar
nit  
Baber committed
33
                  $ lm-eval ls tasks --include_path "/path/to/tasks1:/path/to/tasks2"
Baber's avatar
Baber committed
34

Baber's avatar
cleanup  
Baber committed
35
36
37
38
39
                organization:
                  • Groups: Collections of tasks with aggregated metric across subtasks (e.g., 'mmlu')
                  • Subtasks: Individual evaluation tasks (e.g., 'mmlu_anatomy', 'hellaswag')
                  • Tags: Similar to groups but no aggregate metric (e.g., 'reasoning', 'knowledge', 'language')
                  • External Tasks: Custom tasks defined in external directories
Baber's avatar
Baber committed
40

Baber's avatar
cleanup  
Baber committed
41
42
                evaluation usage:
                  After listing tasks, use them with the run command!
Baber's avatar
Baber committed
43

Baber's avatar
cleanup  
Baber committed
44
45
                For more information tasks configs are defined in https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks
            """),
Baber's avatar
Baber committed
46
47
            formatter_class=argparse.RawDescriptionHelpFormatter,
        )
Baber's avatar
cleanup  
Baber committed
48
        self._add_args()
Baber's avatar
nit  
Baber committed
49
        self._parser.set_defaults(func=self._execute)
Baber's avatar
Baber committed
50

Baber's avatar
cleanup  
Baber committed
51
52
    def _add_args(self) -> None:
        self._parser.add_argument(
Baber's avatar
Baber committed
53
54
            "what",
            choices=["tasks", "groups", "subtasks", "tags"],
Baber's avatar
cleanup  
Baber committed
55
            nargs="?",
Baber's avatar
Baber committed
56
57
            help="What to list: tasks (all), groups, subtasks, or tags",
        )
Baber's avatar
cleanup  
Baber committed
58
        self._parser.add_argument(
Baber's avatar
Baber committed
59
60
61
62
63
64
65
            "--include_path",
            type=str,
            default=None,
            metavar="DIR",
            help="Additional path to include if there are external tasks.",
        )

Baber's avatar
nit  
Baber committed
66
    def _execute(self, args: argparse.Namespace) -> None:
Baber's avatar
Baber committed
67
68
69
70
71
72
73
74
75
76
77
78
79
        """Execute the list command."""
        from lm_eval.tasks import TaskManager

        task_manager = TaskManager(include_path=args.include_path)

        if args.what == "tasks":
            print(task_manager.list_all_tasks())
        elif args.what == "groups":
            print(task_manager.list_all_tasks(list_subtasks=False, list_tags=False))
        elif args.what == "subtasks":
            print(task_manager.list_all_tasks(list_groups=False, list_tags=False))
        elif args.what == "tags":
            print(task_manager.list_all_tasks(list_groups=False, list_subtasks=False))
Baber's avatar
cleanup  
Baber committed
80
81
        elif args.what is None:
            self._parser.print_help()