Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b7d3f0dd
Commit
b7d3f0dd
authored
Jul 04, 2025
by
Baber
Browse files
cleanup
parent
9de93651
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
24 additions
and
22 deletions
+24
-22
lm_eval/__main__.py
lm_eval/__main__.py
+2
-2
lm_eval/_cli/__init__.py
lm_eval/_cli/__init__.py
+7
-7
lm_eval/_cli/eval.py
lm_eval/_cli/eval.py
+6
-6
lm_eval/_cli/list.py
lm_eval/_cli/list.py
+2
-1
lm_eval/_cli/run.py
lm_eval/_cli/run.py
+4
-4
lm_eval/_cli/validate.py
lm_eval/_cli/validate.py
+3
-2
No files found.
lm_eval/__main__.py
View file @
b7d3f0dd
from
lm_eval._cli
import
CLIParser
from
lm_eval._cli
import
Eval
def
cli_evaluate
()
->
None
:
def
cli_evaluate
()
->
None
:
"""Main CLI entry point with subcommand and legacy support."""
"""Main CLI entry point with subcommand and legacy support."""
parser
=
CLIParser
()
parser
=
Eval
()
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
parser
.
execute
(
args
)
parser
.
execute
(
args
)
...
...
lm_eval/_cli/__init__.py
View file @
b7d3f0dd
"""
"""
CLI subcommands
for the Language Model Evaluation Harness
.
CLI subcommands
to run from terminal
.
"""
"""
from
lm_eval._cli.base
import
SubCommand
from
lm_eval._cli.base
import
SubCommand
from
lm_eval._cli.
cli
import
CLIParser
from
lm_eval._cli.
eval
import
Eval
from
lm_eval._cli.list
import
List
Command
from
lm_eval._cli.list
import
List
from
lm_eval._cli.run
import
Run
from
lm_eval._cli.run
import
Run
from
lm_eval._cli.validate
import
Validate
Command
from
lm_eval._cli.validate
import
Validate
__all__
=
[
__all__
=
[
"SubCommand"
,
"SubCommand"
,
"Run"
,
"Run"
,
"List
Command
"
,
"List"
,
"Validate
Command
"
,
"Validate"
,
"
CLIParser
"
,
"
Eval
"
,
]
]
lm_eval/_cli/
cli
.py
→
lm_eval/_cli/
eval
.py
View file @
b7d3f0dd
...
@@ -2,13 +2,13 @@ import argparse
...
@@ -2,13 +2,13 @@ import argparse
import
sys
import
sys
import
textwrap
import
textwrap
from
lm_eval._cli.list
import
List
Command
from
lm_eval._cli.list
import
List
from
lm_eval._cli.run
import
Run
from
lm_eval._cli.run
import
Run
from
lm_eval._cli.validate
import
Validate
Command
from
lm_eval._cli.validate
import
Validate
class
CLIParser
:
class
Eval
:
"""Main CLI parser
class
that manages all subcommands."""
"""Main CLI parser that manages all subcommands."""
def
__init__
(
self
):
def
__init__
(
self
):
self
.
_parser
=
argparse
.
ArgumentParser
(
self
.
_parser
=
argparse
.
ArgumentParser
(
...
@@ -45,8 +45,8 @@ class CLIParser:
...
@@ -45,8 +45,8 @@ class CLIParser:
dest
=
"command"
,
help
=
"Available commands"
,
metavar
=
"COMMAND"
dest
=
"command"
,
help
=
"Available commands"
,
metavar
=
"COMMAND"
)
)
Run
.
create
(
self
.
_subparsers
)
Run
.
create
(
self
.
_subparsers
)
List
Command
.
create
(
self
.
_subparsers
)
List
.
create
(
self
.
_subparsers
)
Validate
Command
.
create
(
self
.
_subparsers
)
Validate
.
create
(
self
.
_subparsers
)
def
parse_args
(
self
)
->
argparse
.
Namespace
:
def
parse_args
(
self
)
->
argparse
.
Namespace
:
"""Parse arguments using the main parser."""
"""Parse arguments using the main parser."""
...
...
lm_eval/_cli/list.py
View file @
b7d3f0dd
...
@@ -4,7 +4,7 @@ import textwrap
...
@@ -4,7 +4,7 @@ import textwrap
from
lm_eval._cli.base
import
SubCommand
from
lm_eval._cli.base
import
SubCommand
class
List
Command
(
SubCommand
):
class
List
(
SubCommand
):
"""Command for listing available tasks."""
"""Command for listing available tasks."""
def
__init__
(
self
,
subparsers
:
argparse
.
_SubParsersAction
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
subparsers
:
argparse
.
_SubParsersAction
,
*
args
,
**
kwargs
):
...
@@ -14,6 +14,7 @@ class ListCommand(SubCommand):
...
@@ -14,6 +14,7 @@ class ListCommand(SubCommand):
"list"
,
"list"
,
help
=
"List available tasks, groups, subtasks, or tags"
,
help
=
"List available tasks, groups, subtasks, or tags"
,
description
=
"List available tasks, groups, subtasks, or tags from the evaluation harness."
,
description
=
"List available tasks, groups, subtasks, or tags from the evaluation harness."
,
usage
=
"lm-eval list [tasks|groups|subtasks|tags] [--include_path DIR]"
,
epilog
=
textwrap
.
dedent
(
"""
epilog
=
textwrap
.
dedent
(
"""
examples:
examples:
# List all available tasks (includes groups, subtasks, and tags)
# List all available tasks (includes groups, subtasks, and tags)
...
...
lm_eval/_cli/run.py
View file @
b7d3f0dd
...
@@ -17,12 +17,12 @@ class Run(SubCommand):
...
@@ -17,12 +17,12 @@ class Run(SubCommand):
"""Command for running language model evaluation."""
"""Command for running language model evaluation."""
def
__init__
(
self
,
subparsers
:
argparse
.
_SubParsersAction
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
subparsers
:
argparse
.
_SubParsersAction
,
*
args
,
**
kwargs
):
# Create and configure the parser
super
().
__init__
(
*
args
,
**
kwargs
)
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
_parser
=
subparsers
.
add_parser
(
self
.
_parser
=
subparsers
.
add_parser
(
"run"
,
"run"
,
help
=
"Run
language model evaluation
"
,
help
=
"Run
the evaluation harness on specified tasks
"
,
description
=
"Evaluate language models on various benchmarks and tasks."
,
description
=
"Evaluate language models on various benchmarks and tasks."
,
usage
=
"lm-eval run --model <model> --tasks <task1,task2,...> [options]"
,
epilog
=
textwrap
.
dedent
(
"""
epilog
=
textwrap
.
dedent
(
"""
examples:
examples:
# Basic evaluation with HuggingFace model
# Basic evaluation with HuggingFace model
...
@@ -42,7 +42,7 @@ class Run(SubCommand):
...
@@ -42,7 +42,7 @@ class Run(SubCommand):
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
)
)
self
.
_add_args
()
self
.
_add_args
()
self
.
_parser
.
set_defaults
(
func
=
lambda
arg
s
:
self
.
_parser
.
print_help
())
self
.
_parser
.
set_defaults
(
func
=
lambda
arg
:
self
.
_parser
.
print_help
())
def
_add_args
(
self
)
->
None
:
def
_add_args
(
self
)
->
None
:
self
.
_parser
=
self
.
_parser
self
.
_parser
=
self
.
_parser
...
@@ -281,7 +281,7 @@ class Run(SubCommand):
...
@@ -281,7 +281,7 @@ class Run(SubCommand):
)
)
def
execute
(
self
,
args
:
argparse
.
Namespace
)
->
None
:
def
execute
(
self
,
args
:
argparse
.
Namespace
)
->
None
:
"""
Execute
the evaluation
command
."""
"""
Runs
the evaluation
harness with the provided arguments
."""
from
lm_eval.config.evaluate_config
import
EvaluatorConfig
from
lm_eval.config.evaluate_config
import
EvaluatorConfig
# Create and validate config (most validation now happens in EvaluationConfig)
# Create and validate config (most validation now happens in EvaluationConfig)
...
...
lm_eval/_cli/validate.py
View file @
b7d3f0dd
...
@@ -5,7 +5,7 @@ import textwrap
...
@@ -5,7 +5,7 @@ import textwrap
from
lm_eval._cli.base
import
SubCommand
from
lm_eval._cli.base
import
SubCommand
class
Validate
Command
(
SubCommand
):
class
Validate
(
SubCommand
):
"""Command for validating tasks."""
"""Command for validating tasks."""
def
__init__
(
self
,
subparsers
:
argparse
.
_SubParsersAction
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
subparsers
:
argparse
.
_SubParsersAction
,
*
args
,
**
kwargs
):
...
@@ -15,6 +15,7 @@ class ValidateCommand(SubCommand):
...
@@ -15,6 +15,7 @@ class ValidateCommand(SubCommand):
"validate"
,
"validate"
,
help
=
"Validate task configurations"
,
help
=
"Validate task configurations"
,
description
=
"Validate task configurations and check for errors."
,
description
=
"Validate task configurations and check for errors."
,
usage
=
"lm-eval validate --tasks <task1,task2> [--include_path DIR]"
,
epilog
=
textwrap
.
dedent
(
"""
epilog
=
textwrap
.
dedent
(
"""
examples:
examples:
# Validate a single task
# Validate a single task
...
@@ -72,7 +73,7 @@ class ValidateCommand(SubCommand):
...
@@ -72,7 +73,7 @@ class ValidateCommand(SubCommand):
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
)
)
self
.
_add_args
()
self
.
_add_args
()
self
.
_parser
.
set_defaults
(
func
=
lambda
arg
s
:
self
.
_parser
.
print_help
())
self
.
_parser
.
set_defaults
(
func
=
lambda
arg
:
self
.
_parser
.
print_help
())
def
_add_args
(
self
)
->
None
:
def
_add_args
(
self
)
->
None
:
self
.
_parser
.
add_argument
(
self
.
_parser
.
add_argument
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment