Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
fbeaa2c1
Commit
fbeaa2c1
authored
Jun 13, 2024
by
Yu Shi Jie
Browse files
Merge branch 'mmlu-pro' of github.com:ysjprojects/lm-evaluation-harness into mmlu-pro
Resolve conflict.
parents
91b2eec6
5c7cba23
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
38 additions
and
28 deletions
+38
-28
.github/workflows/new_tasks.yml
.github/workflows/new_tasks.yml
+2
-2
.github/workflows/unit_tests.yml
.github/workflows/unit_tests.yml
+1
-1
.pre-commit-config.yaml
.pre-commit-config.yaml
+8
-9
lm_eval/api/task.py
lm_eval/api/task.py
+9
-9
lm_eval/filters/decontamination.py
lm_eval/filters/decontamination.py
+0
-1
lm_eval/loggers/evaluation_tracker.py
lm_eval/loggers/evaluation_tracker.py
+1
-1
lm_eval/models/anthropic_llms.py
lm_eval/models/anthropic_llms.py
+1
-1
lm_eval/models/textsynth.py
lm_eval/models/textsynth.py
+2
-1
lm_eval/models/vllm_causallms.py
lm_eval/models/vllm_causallms.py
+4
-1
lm_eval/tasks/aclue/_generate_configs.py
lm_eval/tasks/aclue/_generate_configs.py
+1
-0
lm_eval/tasks/bbh/_generate_configs.py
lm_eval/tasks/bbh/_generate_configs.py
+1
-0
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+1
-0
lm_eval/tasks/bigbench/push_bigbench_dataset.py
lm_eval/tasks/bigbench/push_bigbench_dataset.py
+1
-0
lm_eval/tasks/ceval/_generate_configs.py
lm_eval/tasks/ceval/_generate_configs.py
+1
-0
lm_eval/tasks/cmmlu/_generate_configs.py
lm_eval/tasks/cmmlu/_generate_configs.py
+1
-0
lm_eval/tasks/csatqa/_generate_configs.py
lm_eval/tasks/csatqa/_generate_configs.py
+1
-0
lm_eval/tasks/fda/task.py
lm_eval/tasks/fda/task.py
+0
-2
lm_eval/tasks/ifeval/instructions.py
lm_eval/tasks/ifeval/instructions.py
+1
-0
lm_eval/tasks/ifeval/instructions_registry.py
lm_eval/tasks/ifeval/instructions_registry.py
+1
-0
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+1
-0
No files found.
.github/workflows/new_tasks.yml
View file @
fbeaa2c1
...
...
@@ -20,13 +20,13 @@ jobs:
with
:
fetch-depth
:
2
# OR "2" -> To retrieve the preceding commit.
# Uses the tj-actions/changed-files
@v37
action to check for changes.
# Uses the tj-actions/changed-files action to check for changes.
# Outputs provided here: https://github.com/tj-actions/changed-files#outputs
# The `files_yaml` input optionally takes a yaml string to specify filters,
# and prepends the filter name to the standard output names.
-
name
:
Check task folders
id
:
changed-tasks
uses
:
tj-actions/changed-files@v
37.1
.2
uses
:
tj-actions/changed-files@v
44.5
.2
with
:
# tasks checks the tasks folder and api checks the api folder for changes
files_yaml
:
|
...
...
.github/workflows/unit_tests.yml
View file @
fbeaa2c1
...
...
@@ -32,7 +32,7 @@ jobs:
env
:
SKIP
:
"
no-commit-to-branch,mypy"
uses
:
pre-commit/action@v3.0.
0
uses
:
pre-commit/action@v3.0.
1
# # mypy turned off for now
# - name: Lint with mypy
# run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
...
...
.pre-commit-config.yaml
View file @
fbeaa2c1
...
...
@@ -29,8 +29,7 @@ repos:
-
id
:
mixed-line-ending
args
:
[
--fix=lf
]
-
repo
:
https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev
:
v0.2.2
rev
:
v0.4.8
hooks
:
# Run the linter.
-
id
:
ruff
...
...
@@ -39,7 +38,7 @@ repos:
# Run the formatter.
-
id
:
ruff-format
-
repo
:
https://github.com/codespell-project/codespell
rev
:
v2.
2.6
rev
:
v2.
3.0
hooks
:
-
id
:
codespell
exclude
:
>
...
...
@@ -47,9 +46,9 @@ repos:
.*\.json|ignore.txt|lm_eval/tasks/.*|.*yaml|.*\.ipynb
)$
args
:
[
--check-filenames
,
--check-hidden
,
--ignore-words=ignore.txt
]
-
repo
:
https://github.com/pre-commit/mirrors-mypy
rev
:
v1.5.1
hooks
:
-
id
:
mypy
additional_dependencies
:
[
"
.[sentencepiece,multilingual,promptsource,gptq]"
,
"
types-PyYAML"
,
"
types-requests"
]
exclude
:
^tests/.*$
#
- repo: https://github.com/pre-commit/mirrors-mypy
#
rev: v1.5.1
#
hooks:
#
- id: mypy
#
additional_dependencies: [".[sentencepiece,multilingual,promptsource,gptq]", "types-PyYAML", "types-requests"]
#
exclude: ^tests/.*$
lm_eval/api/task.py
View file @
fbeaa2c1
...
...
@@ -67,9 +67,9 @@ class TaskConfig(dict):
training_split
:
Optional
[
str
]
=
None
validation_split
:
Optional
[
str
]
=
None
test_split
:
Optional
[
str
]
=
None
fewshot_split
:
Optional
[
str
]
=
None
# TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?
)
fewshot_split
:
Optional
[
str
]
=
(
None
# TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
)
# formatting / prompting options.
# see docs/advanced_task_guide.md for more info
process_docs
:
Optional
[
Callable
]
=
None
...
...
@@ -92,9 +92,9 @@ class TaskConfig(dict):
filter_list
:
Optional
[
Union
[
str
,
list
]]
=
None
should_decontaminate
:
bool
=
False
doc_to_decontamination_query
:
Optional
[
str
]
=
None
metadata
:
Optional
[
dict
]
=
None
# by default, not used in the code. allows for users to pass arbitrary info to tasks
metadata
:
Optional
[
dict
]
=
(
None
# by default, not used in the code. allows for users to pass arbitrary info to tasks
)
def
__post_init__
(
self
)
->
None
:
if
self
.
generation_kwargs
is
not
None
:
...
...
@@ -229,9 +229,9 @@ class Task(abc.ABC):
self
.
_config
:
TaskConfig
=
TaskConfig
({
**
config
})
if
config
else
TaskConfig
()
self
.
_filters
=
[
build_filter_ensemble
(
"none"
,
[[
"take_first"
,
None
]])]
self
.
fewshot_rnd
:
Optional
[
random
.
Random
]
=
None
# purposely induce errors in case of improper usage
self
.
fewshot_rnd
:
Optional
[
random
.
Random
]
=
(
None
# purposely induce errors in case of improper usage
)
def
download
(
self
,
...
...
lm_eval/filters/decontamination.py
View file @
fbeaa2c1
...
...
@@ -4,7 +4,6 @@ from lm_eval.api.registry import register_filter
@
register_filter
(
"decontaminate"
)
class
DecontaminationFilter
(
Filter
):
"""
A filter which evaluates
"""
...
...
lm_eval/loggers/evaluation_tracker.py
View file @
fbeaa2c1
...
...
@@ -259,7 +259,7 @@ class EvaluationTracker:
path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
file_results_samples
=
path
.
joinpath
(
f
"samples_
{
task_name
}
_
{
self
.
date_id
}
.json"
f
"samples_
{
task_name
}
_
{
self
.
date_id
}
.json
l
"
)
for
sample
in
samples
:
...
...
lm_eval/models/anthropic_llms.py
View file @
fbeaa2c1
...
...
@@ -307,7 +307,7 @@ please install anthropic via `pip install 'lm-eval[anthropic]'` or `pip install
# defaults to os.environ.get("ANTHROPIC_API_KEY")
self
.
client
=
anthropic
.
Anthropic
()
self
.
temperature
=
temperature
self
.
max_token
=
max_tokens
self
.
max_token
s
=
max_tokens
self
.
tokenizer
=
self
.
client
.
get_tokenizer
()
self
.
kwargs
=
kwargs
...
...
lm_eval/models/textsynth.py
View file @
fbeaa2c1
"""
TextSynth API
"""TextSynth API
Implementation provided by Fabrice Bellard:
https://github.com/EleutherAI/lm-evaluation-harness/issues/295
...
...
@@ -11,6 +11,7 @@ Example usage:
Homepage: https://textsynth.com/index.html
"""
import
logging
import
os
...
...
lm_eval/models/vllm_causallms.py
View file @
fbeaa2c1
...
...
@@ -499,7 +499,10 @@ class VLLM(TemplateLM):
def
modify_gen_kwargs
(
kwargs
:
dict
)
->
dict
:
# sampling_params
do_sample
=
kwargs
.
pop
(
"do_sample"
,
None
)
if
do_sample
is
False
or
"temperature"
not
in
kwargs
:
if
do_sample
is
False
and
"temperature"
not
in
kwargs
:
eval_logger
.
debug
(
"Got `do_sample=False` and no temperature value, setting VLLM temperature to 0.0 ..."
)
kwargs
[
"temperature"
]
=
0.0
# hf defaults
kwargs
[
"skip_special_tokens"
]
=
kwargs
.
get
(
"skip_special_tokens"
,
False
)
...
...
lm_eval/tasks/aclue/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/bbh/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
import
re
...
...
lm_eval/tasks/belebele/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/bigbench/push_bigbench_dataset.py
View file @
fbeaa2c1
...
...
@@ -8,6 +8,7 @@ Requires the installation of
`pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"`
and is included so that the bigbench dependency can be avoided.
"""
import
bigbench.api.util
as
bb_utils
import
datasets
from
tqdm
import
tqdm
...
...
lm_eval/tasks/ceval/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/cmmlu/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/csatqa/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/fda/task.py
View file @
fbeaa2c1
"""
"""
import
re
from
typing
import
List
...
...
lm_eval/tasks/ifeval/instructions.py
View file @
fbeaa2c1
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
"""Library of instructions."""
import
collections
import
json
import
logging
...
...
lm_eval/tasks/ifeval/instructions_registry.py
View file @
fbeaa2c1
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
"""Registry of all instructions."""
from
lm_eval.tasks.ifeval
import
instructions
...
...
lm_eval/tasks/mmlu/_generate_configs.py
View file @
fbeaa2c1
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import
argparse
import
logging
import
os
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment