Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
17172a26
Commit
17172a26
authored
Jan 20, 2024
by
lintangsutawika
Browse files
temp save
parent
81b8e670
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
5 deletions
+14
-5
lm_eval/evaluator.py
lm_eval/evaluator.py
+11
-3
lm_eval/tasks/benchmarks/test.yaml
lm_eval/tasks/benchmarks/test.yaml
+3
-2
No files found.
lm_eval/evaluator.py
View file @
17172a26
...
...
@@ -38,6 +38,7 @@ def simple_evaluate(
write_out
:
bool
=
False
,
log_samples
:
bool
=
True
,
gen_kwargs
:
str
=
None
,
weight_by_size
:
bool
=
False
,
):
"""Instantiate and evaluate a model on a list of tasks.
...
...
@@ -155,6 +156,7 @@ def simple_evaluate(
decontamination_ngrams_path
=
decontamination_ngrams_path
,
write_out
=
write_out
,
log_samples
=
log_samples
,
weight_by_size
=
weight_by_size
,
)
if
lm
.
rank
==
0
:
...
...
@@ -192,6 +194,7 @@ def evaluate(
decontamination_ngrams_path
=
None
,
write_out
:
bool
=
False
,
log_samples
:
bool
=
True
,
weight_by_size
:
bool
=
False
,
):
"""Instantiate and evaluate a model on a list of tasks.
...
...
@@ -474,17 +477,22 @@ def evaluate(
total_size
=
0
for
task
in
task_list
:
print
(
"###"
)
print
(
task
)
print
(
metrics
)
print
(
"###"
)
metrics
=
results
[
task
].
copy
()
if
"alias"
in
metrics
:
metrics
.
pop
(
"alias"
)
current_size
=
metrics
.
pop
(
"samples"
)
# TODO: There should be a way for users
# to toggle between weighted and
# unweighted averaging
# For unweighted averaging, use:
# current_size = 1
if
weight_by_size
:
current_size
=
metrics
.
pop
(
"samples"
)
else
:
current_size
=
1
all_stderr
=
[]
for
metric
in
[
...
...
lm_eval/tasks/benchmarks/test.yaml
View file @
17172a26
...
...
@@ -4,9 +4,10 @@ task:
-
group
:
arc_stuff
task
:
-
arc_challenge
-
glue
-
task
:
arc_easy
metric_list
:
-
metric
:
acc
num_fewshot
:
3
# - task: mmlu_stem
# num_fewshot: 2
\ No newline at end of file
# - task: mmlu
# num_fewshot: 2
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment