Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
bcd6faaa
Commit
bcd6faaa
authored
Sep 26, 2025
by
Baber
Browse files
fixup! merge
parent
58fc07b1
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
43 additions
and
60 deletions
+43
-60
lm_eval/api/group.py
lm_eval/api/group.py
+0
-5
lm_eval/config/evaluate_config.py
lm_eval/config/evaluate_config.py
+19
-17
lm_eval/evaluator.py
lm_eval/evaluator.py
+1
-0
pyproject.toml
pyproject.toml
+23
-38
No files found.
lm_eval/api/group.py
View file @
bcd6faaa
...
@@ -106,10 +106,5 @@ class GroupConfig:
...
@@ -106,10 +106,5 @@ class GroupConfig:
except
(
TypeError
,
OSError
):
except
(
TypeError
,
OSError
):
return
str
(
value
)
return
str
(
value
)
@
property
def
version
(
self
)
->
str
:
"""Returns the version of the group configuration."""
return
self
.
metadata
.
get
(
"version"
,
"1.0"
)
def
__repr__
(
self
):
def
__repr__
(
self
):
return
f
"GroupConfig(group=
{
self
.
group
}
,group_alias=
{
self
.
group_alias
}
)"
return
f
"GroupConfig(group=
{
self
.
group
}
,group_alias=
{
self
.
group_alias
}
)"
lm_eval/config/evaluate_config.py
View file @
bcd6faaa
...
@@ -340,23 +340,25 @@ class EvaluatorConfig:
...
@@ -340,23 +340,25 @@ class EvaluatorConfig:
metadata
=
self
.
metadata
if
self
.
metadata
else
{},
metadata
=
self
.
metadata
if
self
.
metadata
else
{},
)
)
task_names
=
task_manager
.
match_tasks
(
self
.
tasks
)
task_names
=
self
.
tasks
# TODO: FIX TASKS VALIDATION!!!
# Check for any individual task files in the list
# task_names = task_manager.match_tasks(self.tasks)
for
task
in
[
task
for
task
in
self
.
tasks
if
task
not
in
task_names
]:
task_path
=
Path
(
task
)
# # Check for any individual task files in the list
if
task_path
.
is_file
():
# for task in [task for task in self.tasks if task not in task_names]:
config
=
utils
.
load_yaml_config
(
str
(
task_path
))
# task_path = Path(task)
task_names
.
append
(
config
)
# if task_path.is_file():
# config = utils.load_yaml_config(str(task_path))
# Check for missing tasks
# task_names.append(config)
task_missing
=
[
#
task
for
task
in
self
.
tasks
if
task
not
in
task_names
and
"*"
not
in
task
# # Check for missing tasks
]
# task_missing = [
# task for task in self.tasks if task not in task_names and "*" not in task
if
task_missing
:
# ]
missing
=
", "
.
join
(
task_missing
)
#
raise
ValueError
(
f
"Tasks not found:
{
missing
}
"
)
# if task_missing:
# missing = ", ".join(task_missing)
# raise ValueError(f"Tasks not found: {missing}")
# Update tasks with resolved names
# Update tasks with resolved names
self
.
tasks
=
task_names
self
.
tasks
=
task_names
...
...
lm_eval/evaluator.py
View file @
bcd6faaa
...
@@ -30,6 +30,7 @@ from lm_eval.evaluator_utils import (
...
@@ -30,6 +30,7 @@ from lm_eval.evaluator_utils import (
from
lm_eval.loggers
import
EvaluationTracker
from
lm_eval.loggers
import
EvaluationTracker
from
lm_eval.loggers.utils
import
add_env_info
,
add_tokenizer_info
,
get_git_commit_hash
from
lm_eval.loggers.utils
import
add_env_info
,
add_tokenizer_info
,
get_git_commit_hash
from
lm_eval.tasks
import
TaskManager
from
lm_eval.tasks
import
TaskManager
from
lm_eval.tasks.manager
import
get_task_dict
from
lm_eval.utils
import
(
from
lm_eval.utils
import
(
get_logger
,
get_logger
,
handle_non_serializable
,
handle_non_serializable
,
...
...
pyproject.toml
View file @
bcd6faaa
...
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
...
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name
=
"lm_eval"
name
=
"lm_eval"
version
=
"0.4.9.1"
version
=
"0.4.9.1"
authors
=
[
authors
=
[
{name
=
"EleutherAI"
,
email
=
"contact@eleuther.ai"
}
{
name
=
"EleutherAI"
,
email
=
"contact@eleuther.ai"
}
]
]
description
=
"A framework for evaluating language models"
description
=
"A framework for evaluating language models"
readme
=
"README.md"
readme
=
"README.md"
...
@@ -19,25 +19,22 @@ classifiers = [
...
@@ -19,25 +19,22 @@ classifiers = [
requires-python
=
">=3.9"
requires-python
=
">=3.9"
license
=
{
"text"
=
"MIT"
}
license
=
{
"text"
=
"MIT"
}
dependencies
=
[
dependencies
=
[
"accelerate>=0.26.0"
,
"accelerate>=0.26.0"
,
"datasets>=2.16.0,<4.0"
,
"datasets>=2.16.0,<4.0"
,
"evaluate>=0.4.0"
,
"evaluate>=0.4.0"
,
"peft>=0.2.0"
,
"peft>=0.2.0"
,
"pytablewriter"
,
"pytablewriter"
,
"rouge-score>=0.0.4"
,
"rouge-score>=0.0.4"
,
"sacrebleu>=1.5.0"
,
"sacrebleu>=1.5.0"
,
"scikit-learn>=0.24.1"
,
"scikit-learn>=0.24.1"
,
"sqlitedict"
,
"sqlitedict"
,
"torch>=1.8"
,
"torch>=1.8"
,
"transformers>=4.1"
,
"transformers>=4.1"
,
"dill"
,
"dill"
,
"word2number"
,
"word2number"
,
"more_itertools"
"more_itertools"
]
]
[tool.setuptools.packages.find]
include
=
["lm_eval*"]
# required to include yaml files in pip installation
# required to include yaml files in pip installation
[tool.setuptools.package-data]
[tool.setuptools.package-data]
lm_eval
=
[
"**/*.yaml"
,
"tasks/**/*"
]
lm_eval
=
[
"**/*.yaml"
,
"tasks/**/*"
]
...
@@ -63,7 +60,7 @@ ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
...
@@ -63,7 +60,7 @@ ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
ipex
=
["optimum"]
ipex
=
["optimum"]
japanese_leaderboard
=
[
"emoji==2.14.0"
,
"neologdn==0.5.3"
,
"fugashi[unidic-lite]"
,
"rouge_score>=0.1.2"
]
japanese_leaderboard
=
[
"emoji==2.14.0"
,
"neologdn==0.5.3"
,
"fugashi[unidic-lite]"
,
"rouge_score>=0.1.2"
]
longbench
=
[
"jieba"
,
"fuzzywuzzy"
,
"rouge"
]
longbench
=
[
"jieba"
,
"fuzzywuzzy"
,
"rouge"
]
libra
=
["pymorphy2"]
libra
=
["pymorphy2"]
mamba
=
[
"mamba_ssm"
,
"causal-conv1d==1.0.2"
,
"torch"
]
mamba
=
[
"mamba_ssm"
,
"causal-conv1d==1.0.2"
,
"torch"
]
math
=
[
"sympy>=1.12"
,
"antlr4-python3-runtime==4.11"
,
"math_verify[antlr4_11_0]"
]
math
=
[
"sympy>=1.12"
,
"antlr4-python3-runtime==4.11"
,
"math_verify[antlr4_11_0]"
]
multilingual
=
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
,
"pycountry"
]
multilingual
=
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
,
"pycountry"
]
...
@@ -76,16 +73,16 @@ sentencepiece = ["sentencepiece>=0.1.98"]
...
@@ -76,16 +73,16 @@ sentencepiece = ["sentencepiece>=0.1.98"]
sparsify
=
["sparsify"]
sparsify
=
["sparsify"]
discrim_eval
=
["statsmodels==0.14.4"]
discrim_eval
=
["statsmodels==0.14.4"]
tasks
=
[
tasks
=
[
"lm_eval[acpbench]"
,
"lm_eval[acpbench]"
,
"lm_eval[discrim_eval]"
,
"lm_eval[discrim_eval]"
,
"lm_eval[ifeval]"
,
"lm_eval[ifeval]"
,
"lm_eval[japanese_leaderboard]"
,
"lm_eval[japanese_leaderboard]"
,
"lm_eval[longbench]"
,
"lm_eval[longbench]"
,
"lm_eval[libra]"
,
"lm_eval[libra]"
,
"lm_eval[mamba]"
,
"lm_eval[mamba]"
,
"lm_eval[math]"
,
"lm_eval[math]"
,
"lm_eval[multilingual]"
,
"lm_eval[multilingual]"
,
"lm_eval[ruler]"
"lm_eval[ruler]"
]
]
testing
=
[
"pytest"
,
"pytest-cov"
,
"pytest-xdist"
]
testing
=
[
"pytest"
,
"pytest-cov"
,
"pytest-xdist"
]
unitxt
=
["unitxt==1.22.0"]
unitxt
=
["unitxt==1.22.0"]
...
@@ -93,14 +90,6 @@ vllm = ["vllm>=0.4.2"]
...
@@ -93,14 +90,6 @@ vllm = ["vllm>=0.4.2"]
wandb
=
[
"wandb>=0.16.3"
,
"pandas"
,
"numpy"
]
wandb
=
[
"wandb>=0.16.3"
,
"pandas"
,
"numpy"
]
zeno
=
[
"pandas"
,
"zeno-client"
]
zeno
=
[
"pandas"
,
"zeno-client"
]
[project.scripts]
lm-eval
=
"lm_eval.__main__:cli_evaluate"
lm_eval
=
"lm_eval.__main__:cli_evaluate"
[project.urls]
Homepage
=
"https://github.com/EleutherAI/lm-evaluation-harness"
Repository
=
"https://github.com/EleutherAI/lm-evaluation-harness"
[tool.pymarkdown]
[tool.pymarkdown]
plugins.md013.enabled
=
false
# line-length
plugins.md013.enabled
=
false
# line-length
plugins.md024.allow_different_nesting
=
true
# no-duplicate-headers
plugins.md024.allow_different_nesting
=
true
# no-duplicate-headers
...
@@ -123,9 +112,5 @@ combine-as-imports = true
...
@@ -123,9 +112,5 @@ combine-as-imports = true
known-first-party
=
["lm_eval"]
known-first-party
=
["lm_eval"]
lines-after-imports
=
2
lines-after-imports
=
2
# required to include yaml files in pip installation
[tool.setuptools.package-data]
lm_eval
=
[
"**/*.yaml"
,
"tasks/**/*"
]
[tool.setuptools.packages.find]
[tool.setuptools.packages.find]
include
=
["lm_eval*"]
include
=
["lm_eval*"]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment