"configs/_base_/datasets/s3dis_seg-3d-13class.py" did not exist on "9cb75e7ddadf447196d761e97a4bfc26502a5728"
ls.py 3.32 KB
Newer Older
Baber's avatar
Baber committed
1
2
3
4
5
6
import argparse
import textwrap

from lm_eval._cli.subcommand import SubCommand


Baber's avatar
Baber committed
7
class List(SubCommand):
Baber's avatar
Baber committed
8
9
10
11
12
13
    """Command for listing available tasks."""

    def __init__(self, subparsers: argparse._SubParsersAction, *args, **kwargs):
        # Create and configure the parser
        super().__init__(*args, **kwargs)
        self._parser = subparsers.add_parser(
Baber's avatar
Baber committed
14
            "ls",
Baber's avatar
Baber committed
15
16
17
18
19
20
            help="List available tasks, groups, subtasks, or tags",
            description="List available tasks, groups, subtasks, or tags from the evaluation harness.",
            usage="lm-eval list [tasks|groups|subtasks|tags] [--include_path DIR]",
            epilog=textwrap.dedent("""
                examples:
                  # List all available tasks (includes groups, subtasks, and tags)
Baber's avatar
Baber committed
21
                  $ lm-eval ls tasks
Baber's avatar
Baber committed
22
23

                  # List only task groups (like 'mmlu', 'glue', 'superglue')
Baber's avatar
Baber committed
24
                  $ lm-eval ls groups
Baber's avatar
Baber committed
25
26

                  # List only individual subtasks (like 'mmlu_abstract_algebra')
Baber's avatar
Baber committed
27
                  $ lm-eval ls subtasks
Baber's avatar
Baber committed
28
29

                  # Include external task definitions
Baber's avatar
Baber committed
30
                  $ lm-eval ls tasks --include_path /path/to/external/tasks
Baber's avatar
Baber committed
31
32

                  # List tasks from multiple external paths
Baber's avatar
Baber committed
33
                  $ lm-eval ls tasks --include_path "/path/to/tasks1:/path/to/tasks2"
Baber's avatar
Baber committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

                organization:
                  • Groups: Collections of tasks with aggregated metric across subtasks (e.g., 'mmlu')
                  • Subtasks: Individual evaluation tasks (e.g., 'mmlu_anatomy', 'hellaswag')
                  • Tags: Similar to groups but no aggregate metric (e.g., 'reasoning', 'knowledge', 'language')
                  • External Tasks: Custom tasks defined in external directories

                evaluation usage:
                  After listing tasks, use them with the run command!

                For more information tasks configs are defined in https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks
            """),
            formatter_class=argparse.RawDescriptionHelpFormatter,
        )
        self._add_args()
Baber's avatar
Baber committed
49
        self._parser.set_defaults(func=self._execute)
Baber's avatar
Baber committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

    def _add_args(self) -> None:
        self._parser.add_argument(
            "what",
            choices=["tasks", "groups", "subtasks", "tags"],
            nargs="?",
            help="What to list: tasks (all), groups, subtasks, or tags",
        )
        self._parser.add_argument(
            "--include_path",
            type=str,
            default=None,
            metavar="DIR",
            help="Additional path to include if there are external tasks.",
        )

Baber's avatar
Baber committed
66
    def _execute(self, args: argparse.Namespace) -> None:
Baber's avatar
Baber committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
        """Execute the list command."""
        from lm_eval.tasks import TaskManager

        task_manager = TaskManager(include_path=args.include_path)

        if args.what == "tasks":
            print(task_manager.list_all_tasks())
        elif args.what == "groups":
            print(task_manager.list_all_tasks(list_subtasks=False, list_tags=False))
        elif args.what == "subtasks":
            print(task_manager.list_all_tasks(list_groups=False, list_tags=False))
        elif args.what == "tags":
            print(task_manager.list_all_tasks(list_groups=False, list_subtasks=False))
        elif args.what is None:
            self._parser.print_help()