list.py 3.39 KB
Newer Older
Baber's avatar
Baber committed
1
import argparse
Baber's avatar
cleanup  
Baber committed
2
import textwrap
Baber's avatar
Baber committed
3
4
5
6
7
8
9
10
11
12

from lm_eval._cli.base import SubCommand


class ListCommand(SubCommand):
    """Command for listing available tasks."""

    def __init__(self, subparsers: argparse._SubParsersAction, *args, **kwargs):
        # Create and configure the parser
        super().__init__(*args, **kwargs)
Baber's avatar
cleanup  
Baber committed
13
        self._parser = subparsers.add_parser(
Baber's avatar
Baber committed
14
15
16
            "list",
            help="List available tasks, groups, subtasks, or tags",
            description="List available tasks, groups, subtasks, or tags from the evaluation harness.",
Baber's avatar
cleanup  
Baber committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
            epilog=textwrap.dedent("""
                examples:
                  # List all available tasks (includes groups, subtasks, and tags)
                  $ lm-eval list tasks
                  
                  # List only task groups (like 'mmlu', 'glue', 'superglue')
                  $ lm-eval list groups
                  
                  # List only individual subtasks (like 'mmlu_abstract_algebra')
                  $ lm-eval list subtasks
                  
                  # Include external task definitions
                  $ lm-eval list tasks --include_path /path/to/external/tasks
                  
                  # List tasks from multiple external paths
                  $ lm-eval list tasks --include_path "/path/to/tasks1:/path/to/tasks2"
                
                organization:
                  • Groups: Collections of tasks with aggregated metric across subtasks (e.g., 'mmlu')
                  • Subtasks: Individual evaluation tasks (e.g., 'mmlu_anatomy', 'hellaswag')
                  • Tags: Similar to groups but no aggregate metric (e.g., 'reasoning', 'knowledge', 'language')
                  • External Tasks: Custom tasks defined in external directories
                
                evaluation usage:
                  After listing tasks, use them with the run command!
                  
                For more information tasks configs are defined in https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks
            """),
Baber's avatar
Baber committed
45
46
            formatter_class=argparse.RawDescriptionHelpFormatter,
        )
Baber's avatar
cleanup  
Baber committed
47
48
        self._add_args()
        self._parser.set_defaults(func=lambda arg: self._parser.print_help())
Baber's avatar
Baber committed
49

Baber's avatar
cleanup  
Baber committed
50
51
    def _add_args(self) -> None:
        self._parser.add_argument(
Baber's avatar
Baber committed
52
53
            "what",
            choices=["tasks", "groups", "subtasks", "tags"],
Baber's avatar
cleanup  
Baber committed
54
            nargs="?",
Baber's avatar
Baber committed
55
56
            help="What to list: tasks (all), groups, subtasks, or tags",
        )
Baber's avatar
cleanup  
Baber committed
57
        self._parser.add_argument(
Baber's avatar
Baber committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
            "--include_path",
            type=str,
            default=None,
            metavar="DIR",
            help="Additional path to include if there are external tasks.",
        )

    def execute(self, args: argparse.Namespace) -> None:
        """Execute the list command."""
        from lm_eval.tasks import TaskManager

        task_manager = TaskManager(include_path=args.include_path)

        if args.what == "tasks":
            print(task_manager.list_all_tasks())
        elif args.what == "groups":
            print(task_manager.list_all_tasks(list_subtasks=False, list_tags=False))
        elif args.what == "subtasks":
            print(task_manager.list_all_tasks(list_groups=False, list_tags=False))
        elif args.what == "tags":
            print(task_manager.list_all_tasks(list_groups=False, list_subtasks=False))
Baber's avatar
cleanup  
Baber committed
79
80
        elif args.what is None:
            self._parser.print_help()