Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
50e99bd7
Commit
50e99bd7
authored
Sep 20, 2023
by
Herbie Bradley
Browse files
Merge remote-tracking branch 'origin/big-refactor' into calibration
parents
3d4c4cd6
a3252ed7
Changes
49
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
107 additions
and
93 deletions
+107
-93
lm_eval/tasks/translation/utils.py
lm_eval/tasks/translation/utils.py
+1
-1
lm_eval/utils.py
lm_eval/utils.py
+10
-10
main.py
main.py
+2
-3
mypy.ini
mypy.ini
+1
-1
pyproject.toml
pyproject.toml
+81
-0
scripts/write_out.py
scripts/write_out.py
+6
-2
setup.py
setup.py
+2
-74
tests/test_evaluator.py
tests/test_evaluator.py
+2
-1
tests/utils.py
tests/utils.py
+2
-1
No files found.
lm_eval/tasks/translation/utils.py
View file @
50e99bd7
...
@@ -10,7 +10,7 @@ try:
...
@@ -10,7 +10,7 @@ try:
except
ModuleNotFoundError
:
except
ModuleNotFoundError
:
raise
Exception
(
raise
Exception
(
"`pycountry` is required for generating translation task prompt templates.
\
"`pycountry` is required for generating translation task prompt templates.
\
please install pycountry via pip install lm-eval[multilingua] or pip install -e .[multilingual]"
,
please install pycountry via pip install lm-eval[multilingua
l
] or pip install -e .[multilingual]"
,
)
)
...
...
lm_eval/utils.py
View file @
50e99bd7
...
@@ -16,7 +16,6 @@ import gc
...
@@ -16,7 +16,6 @@ import gc
import
torch
import
torch
import
transformers
import
transformers
from
omegaconf
import
OmegaConf
from
jinja2
import
BaseLoader
,
Environment
,
StrictUndefined
from
jinja2
import
BaseLoader
,
Environment
,
StrictUndefined
from
itertools
import
islice
from
itertools
import
islice
...
@@ -55,8 +54,8 @@ def simple_parse_args_string(args_string):
...
@@ -55,8 +54,8 @@ def simple_parse_args_string(args_string):
args_string
=
args_string
.
strip
()
args_string
=
args_string
.
strip
()
if
not
args_string
:
if
not
args_string
:
return
{}
return
{}
arg_list
=
args_string
.
split
(
","
)
arg_list
=
[
arg
for
arg
in
args_string
.
split
(
","
)
if
arg
]
args_dict
=
OmegaConf
.
to_object
(
OmegaConf
.
from_dot
li
s
t
(
arg_list
))
args_dict
=
{
k
:
v
for
k
,
v
in
[
arg
.
sp
lit
(
"="
)
for
arg
in
arg_list
]}
return
args_dict
return
args_dict
...
@@ -267,9 +266,9 @@ def make_table(result_dict, column: str = "results"):
...
@@ -267,9 +266,9 @@ def make_table(result_dict, column: str = "results"):
from
pytablewriter
import
MarkdownTableWriter
,
LatexTableWriter
from
pytablewriter
import
MarkdownTableWriter
,
LatexTableWriter
if
column
==
"results"
:
if
column
==
"results"
:
column_name
=
"Task"
column_name
=
"Task
s
"
elif
column
==
"
aggregate
"
:
elif
column
==
"
groups
"
:
column_name
=
"
Benchmark
"
column_name
=
"
Groups
"
md_writer
=
MarkdownTableWriter
()
md_writer
=
MarkdownTableWriter
()
latex_writer
=
LatexTableWriter
()
latex_writer
=
LatexTableWriter
()
...
@@ -395,8 +394,10 @@ def import_function(loader, node):
...
@@ -395,8 +394,10 @@ def import_function(loader, node):
function_name
=
loader
.
construct_scalar
(
node
)
function_name
=
loader
.
construct_scalar
(
node
)
yaml_path
=
os
.
path
.
dirname
(
loader
.
name
)
yaml_path
=
os
.
path
.
dirname
(
loader
.
name
)
module_name
,
function_name
=
function_name
.
split
(
"."
)
*
module_name
,
function_name
=
function_name
.
split
(
"."
)
module_path
=
os
.
path
.
join
(
yaml_path
,
"{}.py"
.
format
(
module_name
))
if
type
(
module_name
)
==
list
:
module_name
=
"."
.
join
(
module_name
)
module_path
=
os
.
path
.
normpath
(
os
.
path
.
join
(
yaml_path
,
"{}.py"
.
format
(
module_name
)))
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
module_path
)
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
module_path
)
module
=
importlib
.
util
.
module_from_spec
(
spec
)
module
=
importlib
.
util
.
module_from_spec
(
spec
)
...
@@ -430,8 +431,7 @@ def load_yaml_config(yaml_path):
...
@@ -430,8 +431,7 @@ def load_yaml_config(yaml_path):
# If not found, assume the included yaml
# If not found, assume the included yaml
# is in the same dir as the original yaml
# is in the same dir as the original yaml
if
not
os
.
path
.
isfile
(
path
):
if
not
os
.
path
.
isfile
(
path
):
path
=
os
.
path
.
join
(
yaml_dir
,
path
)
path
=
os
.
path
.
normpath
(
os
.
path
.
join
(
yaml_dir
,
path
))
try
:
try
:
included_yaml_config
=
load_yaml_config
(
path
)
included_yaml_config
=
load_yaml_config
(
path
)
final_yaml_config
.
update
(
included_yaml_config
)
final_yaml_config
.
update
(
included_yaml_config
)
...
...
main.py
View file @
50e99bd7
...
@@ -11,7 +11,6 @@ from lm_eval import evaluator, utils
...
@@ -11,7 +11,6 @@ from lm_eval import evaluator, utils
from
lm_eval.api.registry
import
ALL_TASKS
from
lm_eval.api.registry
import
ALL_TASKS
from
lm_eval.logger
import
eval_logger
,
SPACING
from
lm_eval.logger
import
eval_logger
,
SPACING
from
lm_eval.tasks
import
include_task_folder
from
lm_eval.tasks
import
include_task_folder
from
lm_eval.benchmarks
import
include_benchmarks
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
...
@@ -209,8 +208,8 @@ def main() -> None:
...
@@ -209,8 +208,8 @@ def main() -> None:
f
"batch_size:
{
args
.
batch_size
}{
f
' (
{
batch_sizes
}
)
' if batch_sizes else ''
}
"
f
"batch_size:
{
args
.
batch_size
}{
f
' (
{
batch_sizes
}
)
' if batch_sizes else ''
}
"
)
)
print
(
evaluator
.
make_table
(
results
))
print
(
evaluator
.
make_table
(
results
))
if
"
aggregate
"
in
results
:
if
"
groups
"
in
results
:
print
(
evaluator
.
make_table
(
results
,
"
aggregate
"
))
print
(
evaluator
.
make_table
(
results
,
"
groups
"
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
mypy.ini
View file @
50e99bd7
[mypy]
[mypy]
python_version
=
3.
9
python_version
=
3.
8
show_traceback
=
True
show_traceback
=
True
check_untyped_defs
=
True
check_untyped_defs
=
True
no_implicit_reexport
=
True
no_implicit_reexport
=
True
...
...
pyproject.toml
View file @
50e99bd7
[build-system]
[build-system]
requires
=
[
"setuptools>=40.8.0"
,
"wheel"
]
requires
=
[
"setuptools>=40.8.0"
,
"wheel"
]
build-backend
=
"setuptools.build_meta"
build-backend
=
"setuptools.build_meta"
[project]
name
=
"lm_eval"
version
=
"1.0.0"
authors
=
[
{name=
"EleutherAI"
,
email=
"contact@eleuther.ai"
}
]
description
=
"A framework for evaluating language models"
readme
=
"README.md"
classifiers
=
[
"Development Status :: 3 - Alpha"
,
"Programming Language :: Python :: 3"
,
"License :: OSI Approved :: MIT License"
,
"Operating System :: OS Independent"
,
]
requires-python
=
">=3.8"
license
=
{
"text"
=
"MIT"
}
dependencies
=
[
"accelerate>=0.21.0"
,
"evaluate"
,
"datasets>=2.0.0"
,
"evaluate>=0.4.0"
,
"jsonlines"
,
"numexpr"
,
"peft>=0.2.0"
,
"pybind11>=2.6.2"
,
"pytablewriter"
,
"rouge-score>=0.0.4"
,
"sacrebleu>=1.5.0"
,
"scikit-learn>=0.24.1"
,
"sqlitedict"
,
"torch>=1.8"
,
"tqdm-multiprocess"
,
"transformers>=4.1"
,
"zstandard"
,
]
[tool.setuptools]
packages
=
["lm_eval"]
# required to include yaml files in pip installation
[tool.setuptools.package-data]
lm_eval
=
[
"**/*.yaml"
,
"tasks/**/*"
]
examples
=
["**/*.yaml"]
[project.scripts]
lm-eval
=
"main:main"
lm_eval
=
"main:main"
[project.urls]
Homepage
=
"https://github.com/EleutherAI/lm-evaluation-harness"
Repository
=
"https://github.com/EleutherAI/lm-evaluation-harness"
[project.optional-dependencies]
dev
=
[
"black"
,
"flake8"
,
"pre-commit"
,
"pytest"
,
"pytest-cov"
]
linting
=
[
"flake8"
,
"pylint"
,
"mypy"
,
"pre-commit"
,
]
testing
=
[
"pytest"
,
"pytest-cov"
,
"pytest-xdist"
]
multilingual
=
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
,
"pycountry"
]
sentencepiece
=
[
"sentencepiece>=0.1.98"
,
"protobuf>=4.22.1"
]
promptsource
=
[
"promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
]
gptq
=
["auto-gptq[triton]
@
git+https://github.com/PanQiWei/AutoGPTQ
"]
anthropic
=
["anthropic"]
openai
=
[
"openai"
,
"tiktoken"
]
all
=
[
"lm_eval[dev]"
,
"lm_eval[testing]"
,
"lm_eval[linting]"
,
"lm_eval[multilingual]"
,
"lm_eval[sentencepiece]"
,
"lm_eval[promptsource]"
,
"lm_eval[gptq]"
,
"lm_eval[anthropic]"
,
"lm_eval[openai]"
]
scripts/write_out.py
View file @
50e99bd7
...
@@ -38,17 +38,21 @@ def main():
...
@@ -38,17 +38,21 @@ def main():
iters
=
[]
iters
=
[]
for
set
in
args
.
sets
.
split
(
","
):
for
set
in
args
.
sets
.
split
(
","
):
docs
=
None
if
set
==
"train"
and
task
.
has_training_docs
():
if
set
==
"train"
and
task
.
has_training_docs
():
docs
=
task
.
training_docs
()
docs
=
task
.
training_docs
()
if
set
==
"val"
and
task
.
has_validation_docs
():
if
set
==
"val"
and
task
.
has_validation_docs
():
docs
=
task
.
validation_docs
()
docs
=
task
.
validation_docs
()
if
set
==
"test"
and
task
.
has_test_docs
():
if
set
==
"test"
and
task
.
has_test_docs
():
docs
=
task
.
test_docs
()
docs
=
task
.
test_docs
()
iters
.
append
(
docs
)
if
docs
is
not
None
:
iters
.
append
(
docs
)
docs
=
join_iters
(
iters
)
docs
=
join_iters
(
iters
)
with
open
(
os
.
path
.
join
(
args
.
output_base_path
,
task_name
),
"w"
)
as
f
:
with
open
(
os
.
path
.
join
(
args
.
output_base_path
,
task_name
),
"w"
,
encoding
=
"utf8"
)
as
f
:
for
i
,
doc
in
(
for
i
,
doc
in
(
zip
(
range
(
args
.
num_examples
),
docs
)
zip
(
range
(
args
.
num_examples
),
docs
)
if
args
.
num_examples
>
0
if
args
.
num_examples
>
0
...
...
setup.py
View file @
50e99bd7
import
setuptools
import
setuptools
import
itertools
with
open
(
"README.md"
,
"r"
,
encoding
=
"utf-8"
)
as
fh
:
# This is to make sure that the package supports editable installs
long_description
=
fh
.
read
()
setuptools
.
setup
()
extras_require
=
{
"dev"
:
[
"black"
,
"flake8"
,
"pre-commit"
,
"pytest"
,
"pytest-cov"
],
"linting"
:
[
"flake8"
,
"pylint"
,
"mypy"
,
"pre-commit"
,
],
"testing"
:
[
"pytest"
,
"pytest-cov"
,
"pytest-xdist"
],
"multilingual"
:
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
],
"sentencepiece"
:
[
"sentencepiece>=0.1.98"
,
"protobuf>=4.22.1"
,
"pycountry"
],
"promptsource"
:
[
"promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
],
"gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"anthropic"
:
[
"anthropic"
],
"openai"
:
[
"openai"
,
"tiktoken"
],
}
extras_require
[
"all"
]
=
list
(
itertools
.
chain
.
from_iterable
(
extras_require
.
values
()))
setuptools
.
setup
(
name
=
"lm_eval"
,
version
=
"1.0.0"
,
author
=
"EleutherAI"
,
author_email
=
"contact@eleuther.ai"
,
description
=
"A framework for evaluating language models"
,
long_description
=
long_description
,
long_description_content_type
=
"text/markdown"
,
url
=
"https://github.com/EleutherAI/lm-evaluation-harness"
,
packages
=
setuptools
.
find_packages
(),
# required to include yaml files in pip installation
package_data
=
{
"lm_eval"
:
[
"**/*.yaml"
,
"tasks/**/*"
],
"examples"
:
[
"**/*.yaml"
],
},
entry_points
=
{
"console_scripts"
:
[
"lm-eval = main:main"
,
"lm_eval = main:main"
],
},
include_package_data
=
True
,
classifiers
=
[
"Development Status :: 3 - Alpha"
,
"Programming Language :: Python :: 3"
,
"License :: OSI Approved :: MIT License"
,
"Operating System :: OS Independent"
,
],
python_requires
=
">=3.9"
,
install_requires
=
[
"accelerate>=0.21.0"
,
"evaluate"
,
"datasets>=2.0.0"
,
"evaluate>=0.4.0"
,
"jsonlines"
,
"numexpr"
,
"omegaconf>=2.2"
,
"peft>=0.2.0"
,
"pybind11>=2.6.2"
,
"pytablewriter"
,
"rouge-score>=0.0.4"
,
"sacrebleu>=1.5.0"
,
"scikit-learn>=0.24.1"
,
"sqlitedict"
,
"torch>=1.8"
,
"tqdm-multiprocess"
,
"transformers>=4.1"
,
"zstandard"
,
],
extras_require
=
extras_require
,
)
tests/test_evaluator.py
View file @
50e99bd7
...
@@ -7,6 +7,7 @@ import lm_eval.tasks as tasks
...
@@ -7,6 +7,7 @@ import lm_eval.tasks as tasks
# import lm_eval.models as models
# import lm_eval.models as models
import
lm_eval.api
as
api
import
lm_eval.api
as
api
import
lm_eval.evaluator
as
evaluator
import
lm_eval.evaluator
as
evaluator
from
typing
import
List
import
random
import
random
import
pytest
import
pytest
...
@@ -26,7 +27,7 @@ import pytest
...
@@ -26,7 +27,7 @@ import pytest
)
)
],
],
)
)
def
test_evaluator
(
task_name
:
l
ist
[
str
],
limit
:
int
,
model
:
str
,
model_args
:
str
):
def
test_evaluator
(
task_name
:
L
ist
[
str
],
limit
:
int
,
model
:
str
,
model_args
:
str
):
task_name
=
task_name
task_name
=
task_name
limit
=
10
limit
=
10
...
...
tests/utils.py
View file @
50e99bd7
...
@@ -9,6 +9,7 @@ import os
...
@@ -9,6 +9,7 @@ import os
# This is the path where the output for the changed files for the tasks folder is stored
# This is the path where the output for the changed files for the tasks folder is stored
# FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
# FILE_PATH = file_path = ".github/outputs/tasks_all_changed_and_modified_files.txt"
# reads a text file and returns a list of words
# reads a text file and returns a list of words
# used to read the output of the changed txt from tj-actions/changed-files
# used to read the output of the changed txt from tj-actions/changed-files
def
load_changed_files
(
file_path
:
str
)
->
List
[
str
]:
def
load_changed_files
(
file_path
:
str
)
->
List
[
str
]:
...
@@ -32,7 +33,7 @@ def parser(full_path: List[str]) -> List[str]:
...
@@ -32,7 +33,7 @@ def parser(full_path: List[str]) -> List[str]:
return
list
(
_output
)
return
list
(
_output
)
def
new_tasks
()
->
Union
[
l
ist
[
str
],
None
]:
def
new_tasks
()
->
Union
[
L
ist
[
str
],
None
]:
FILENAME
=
".github/outputs/tasks_all_changed_and_modified_files.txt"
FILENAME
=
".github/outputs/tasks_all_changed_and_modified_files.txt"
if
os
.
path
.
exists
(
FILENAME
):
if
os
.
path
.
exists
(
FILENAME
):
# If tasks folder has changed then we get the list of files from FILENAME
# If tasks folder has changed then we get the list of files from FILENAME
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment