parser.py 6.41 KB
Newer Older
Baber's avatar
Baber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import argparse
import sys
from typing import Dict, Type

from lm_eval._cli.base import SubCommand
from lm_eval._cli.cache import CacheCommand
from lm_eval._cli.evaluate import EvaluateCommand
from lm_eval._cli.list import ListCommand
from lm_eval._cli.validate import ValidateCommand


def check_argument_types(parser: argparse.ArgumentParser):
    """
    Check to make sure all CLI args are typed, raises error if not
    """
    for action in parser._actions:
        # Skip help, subcommands, and const actions
        if action.dest in ["help", "command"] or action.const is not None:
            continue
        if action.type is None:
            raise ValueError(f"Argument '{action.dest}' doesn't have a type specified.")
        else:
            continue


class CLIParser:
    """Main CLI parser class that manages all subcommands."""

    def __init__(self):
        self.parser = None
        self.subparsers = None
        self.legacy_parser = None
        self.command_instances: Dict[str, SubCommand] = {}

    def setup_parser(self) -> argparse.ArgumentParser:
        """Set up the main parser with subcommands."""
        if self.parser is not None:
            return self.parser

        self.parser = argparse.ArgumentParser(
            prog="lm-eval",
            description="Language Model Evaluation Harness",
            formatter_class=argparse.RawTextHelpFormatter,
        )

        # Create subparsers
        self.subparsers = self.parser.add_subparsers(
            dest="command", help="Available commands", metavar="COMMAND"
        )

        # Create and register all command instances
        self.command_instances = {
            "evaluate": EvaluateCommand.create(self.subparsers),
            "list": ListCommand.create(self.subparsers),
            "validate": ValidateCommand.create(self.subparsers),
            "cache": CacheCommand.create(self.subparsers),
        }

        return self.parser

    def setup_legacy_parser(self) -> argparse.ArgumentParser:
        """Set up legacy parser for backward compatibility."""
        if self.legacy_parser is not None:
            return self.legacy_parser

        self.legacy_parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter
        )

        # For legacy mode, we just need to add the evaluate command's arguments
        # without the subcommand structure. We'll create a temporary instance.
        from lm_eval._cli.evaluate import EvaluateCommand as EvalCmd

        # Create a minimal instance just to get the arguments
        temp_cmd = object.__new__(EvalCmd)
        temp_cmd._add_args(self.legacy_parser)

        return self.legacy_parser

    def parse_args(self, args=None) -> argparse.Namespace:
        """Parse arguments using the main parser."""
        parser = self.setup_parser()
        check_argument_types(parser)
        return parser.parse_args(args)

    def parse_legacy_args(self, args=None) -> argparse.Namespace:
        """Parse arguments using the legacy parser."""
        parser = self.setup_legacy_parser()
        check_argument_types(parser)
        return parser.parse_args(args)

    def should_use_subcommand_mode(self, argv=None) -> bool:
        """Determine if we should use subcommand mode based on arguments."""
        if argv is None:
            argv = sys.argv[1:]

        # If no arguments, show main help
        if len(argv) == 0:
            return True

        # Check if first argument is a known subcommand
        # First ensure parser is set up to populate command_instances
        if not self.command_instances:
            self.setup_parser()

        if len(argv) > 0 and argv[0] in self.command_instances:
            return True

        return False

    def execute(self, argv=None) -> None:
        """Main execution method that handles both subcommand and legacy modes."""
        if self.should_use_subcommand_mode(argv):
            # Use subcommand mode
            if argv is None and len(sys.argv) == 1:
                # No arguments provided, show help
                self.setup_parser().print_help()
                sys.exit(1)

            args = self.parse_args(argv)
            args.func(args)
        else:
            # Use legacy mode for backward compatibility
            args = self.parse_legacy_args(argv)
            self._handle_legacy_mode(args)

    def _handle_legacy_mode(self, args: argparse.Namespace) -> None:
        """Handle legacy CLI mode for backward compatibility."""

        # Handle legacy task listing
        if hasattr(args, "tasks") and args.tasks in [
            "list",
            "list_groups",
            "list_subtasks",
            "list_tags",
        ]:
            from lm_eval.tasks import TaskManager

            task_manager = TaskManager(include_path=getattr(args, "include_path", None))

            if args.tasks == "list":
                print(task_manager.list_all_tasks())
            elif args.tasks == "list_groups":
                print(task_manager.list_all_tasks(list_subtasks=False, list_tags=False))
            elif args.tasks == "list_subtasks":
                print(task_manager.list_all_tasks(list_groups=False, list_tags=False))
            elif args.tasks == "list_tags":
                print(
                    task_manager.list_all_tasks(list_groups=False, list_subtasks=False)
                )
            sys.exit(0)

        # Handle legacy evaluation
        # Use existing instance if available, otherwise create temporary one
        if "evaluate" in self.command_instances:
            evaluate_cmd = self.command_instances["evaluate"]
        else:
            # For legacy mode, we don't need the subparser registration
            # Just execute with the existing args
            from lm_eval._cli.evaluate import EvaluateCommand as EvalCmd

            # Create a minimal instance just for execution
            evaluate_cmd = object.__new__(EvalCmd)
        evaluate_cmd.execute(args)

    def add_command(self, name: str, command_class: Type[SubCommand]) -> None:
        """Add a new command to the parser (for extensibility)."""
        # If parser is already set up, create and register the command instance
        if self.subparsers is not None:
            self.command_instances[name] = command_class.create(self.subparsers)
        else:
            # Store class for later instantiation
            if not hasattr(self, "_pending_commands"):
                self._pending_commands = {}
            self._pending_commands[name] = command_class