list.py 3.47 KB
Newer Older
Baber's avatar
Baber committed
1
import argparse
Baber's avatar
cleanup  
Baber committed
2
import textwrap
Baber's avatar
Baber committed
3
4
5
6

from lm_eval._cli.base import SubCommand


Baber's avatar
cleanup  
Baber committed
7
class List(SubCommand):
Baber's avatar
Baber committed
8
9
10
11
12
    """Command for listing available tasks."""

    def __init__(self, subparsers: argparse._SubParsersAction, *args, **kwargs):
        # Create and configure the parser
        super().__init__(*args, **kwargs)
Baber's avatar
cleanup  
Baber committed
13
        self._parser = subparsers.add_parser(
Baber's avatar
Baber committed
14
15
16
            "list",
            help="List available tasks, groups, subtasks, or tags",
            description="List available tasks, groups, subtasks, or tags from the evaluation harness.",
Baber's avatar
cleanup  
Baber committed
17
            usage="lm-eval list [tasks|groups|subtasks|tags] [--include_path DIR]",
Baber's avatar
cleanup  
Baber committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
            epilog=textwrap.dedent("""
                examples:
                  # List all available tasks (includes groups, subtasks, and tags)
                  $ lm-eval list tasks
                  
                  # List only task groups (like 'mmlu', 'glue', 'superglue')
                  $ lm-eval list groups
                  
                  # List only individual subtasks (like 'mmlu_abstract_algebra')
                  $ lm-eval list subtasks
                  
                  # Include external task definitions
                  $ lm-eval list tasks --include_path /path/to/external/tasks
                  
                  # List tasks from multiple external paths
                  $ lm-eval list tasks --include_path "/path/to/tasks1:/path/to/tasks2"
                
                organization:
                  • Groups: Collections of tasks with aggregated metric across subtasks (e.g., 'mmlu')
                  • Subtasks: Individual evaluation tasks (e.g., 'mmlu_anatomy', 'hellaswag')
                  • Tags: Similar to groups but no aggregate metric (e.g., 'reasoning', 'knowledge', 'language')
                  • External Tasks: Custom tasks defined in external directories
                
                evaluation usage:
                  After listing tasks, use them with the run command!
                  
                For more information tasks configs are defined in https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks
            """),
Baber's avatar
Baber committed
46
47
            formatter_class=argparse.RawDescriptionHelpFormatter,
        )
Baber's avatar
cleanup  
Baber committed
48
49
        self._add_args()
        self._parser.set_defaults(func=lambda arg: self._parser.print_help())
Baber's avatar
Baber committed
50

Baber's avatar
cleanup  
Baber committed
51
52
    def _add_args(self) -> None:
        self._parser.add_argument(
Baber's avatar
Baber committed
53
54
            "what",
            choices=["tasks", "groups", "subtasks", "tags"],
Baber's avatar
cleanup  
Baber committed
55
            nargs="?",
Baber's avatar
Baber committed
56
57
            help="What to list: tasks (all), groups, subtasks, or tags",
        )
Baber's avatar
cleanup  
Baber committed
58
        self._parser.add_argument(
Baber's avatar
Baber committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
            "--include_path",
            type=str,
            default=None,
            metavar="DIR",
            help="Additional path to include if there are external tasks.",
        )

    def execute(self, args: argparse.Namespace) -> None:
        """Execute the list command."""
        from lm_eval.tasks import TaskManager

        task_manager = TaskManager(include_path=args.include_path)

        if args.what == "tasks":
            print(task_manager.list_all_tasks())
        elif args.what == "groups":
            print(task_manager.list_all_tasks(list_subtasks=False, list_tags=False))
        elif args.what == "subtasks":
            print(task_manager.list_all_tasks(list_groups=False, list_tags=False))
        elif args.what == "tags":
            print(task_manager.list_all_tasks(list_groups=False, list_subtasks=False))
Baber's avatar
cleanup  
Baber committed
80
81
        elif args.what is None:
            self._parser.print_help()