Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
79545adb
Commit
79545adb
authored
Jun 11, 2023
by
Benjamin Fattori
Browse files
Merge remote-tracking branch 'upstream/big-refactor' into seq2seq-refactor
parents
eb7b9095
761f0087
Changes
64
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
15 deletions
+22
-15
lm_eval/tasks/wikitext/preprocess_wikitext.py
lm_eval/tasks/wikitext/preprocess_wikitext.py
+1
-0
lm_eval/tasks/wikitext/wikitext.yaml
lm_eval/tasks/wikitext/wikitext.yaml
+1
-7
lm_eval/utils.py
lm_eval/utils.py
+17
-7
main.py
main.py
+3
-1
No files found.
lm_eval/tasks/wikitext/preprocess_wikitext.py
View file @
79545adb
import
re
def
wikitext_detokenizer
(
doc
):
string
=
doc
[
"page"
]
# contractions
...
...
lm_eval/tasks/wikitext/wikitext.yaml
View file @
79545adb
group
:
-
wikitext_group
task
:
wikitext_yaml
task
:
default
dataset_path
:
EleutherAI/wikitext_document_level
dataset_name
:
wikitext-2-raw-v1
output_type
:
loglikelihood_rolling
...
...
@@ -14,11 +14,5 @@ should_decontaminate: true
doc_to_decontamination_query
:
"
{{page}}"
metric_list
:
-
metric
:
word_perplexity
aggregation
:
weighted_perplexity
higher_is_better
:
false
-
metric
:
byte_perplexity
aggregation
:
weighted_perplexity
higher_is_better
:
false
-
metric
:
bits_per_byte
aggregation
:
bits_per_byte
higher_is_better
:
false
\ No newline at end of file
lm_eval/utils.py
View file @
79545adb
...
...
@@ -157,22 +157,32 @@ def make_table(result_dict):
md_writer
=
MarkdownTableWriter
()
latex_writer
=
LatexTableWriter
()
md_writer
.
headers
=
[
"Task"
,
"Version"
,
"Metric"
,
"Value"
,
""
,
"Stderr"
]
latex_writer
.
headers
=
[
"Task"
,
"Version"
,
"Metric"
,
"Value"
,
""
,
"Stderr"
]
md_writer
.
headers
=
[
"Task"
,
"Version"
,
"Filter"
,
"Metric"
,
"Value"
,
""
,
"Stderr"
]
latex_writer
.
headers
=
[
"Task"
,
"Version"
,
"Filter"
,
"Metric"
,
"Value"
,
""
,
"Stderr"
,
]
values
=
[]
for
k
,
dic
in
result_dict
[
"results"
].
items
():
version
=
result_dict
[
"versions"
][
k
]
for
m
,
v
in
dic
.
items
():
for
(
mf
),
v
in
dic
.
items
():
m
,
_
,
f
=
mf
.
partition
(
","
)
print
(
m
,
f
)
if
m
.
endswith
(
"_stderr"
):
continue
if
m
+
"_stderr"
in
dic
:
se
=
dic
[
m
+
"_stderr"
]
values
.
append
([
k
,
version
,
m
,
"%.4f"
%
v
,
"±"
,
"%.4f"
%
se
])
if
m
+
"_stderr"
+
","
+
f
in
dic
:
se
=
dic
[
m
+
"_stderr"
+
","
+
f
]
values
.
append
([
k
,
version
,
f
,
m
,
"%.4f"
%
v
,
"±"
,
"%.4f"
%
se
])
else
:
values
.
append
([
k
,
version
,
m
,
"%.4f"
%
v
,
""
,
""
])
values
.
append
([
k
,
version
,
f
,
m
,
"%.4f"
%
v
,
""
,
""
])
k
=
""
version
=
""
md_writer
.
value_matrix
=
values
...
...
main.py
View file @
79545adb
...
...
@@ -19,7 +19,9 @@ class MultiChoice:
for
value
in
values
.
split
(
","
):
if
len
(
fnmatch
.
filter
(
self
.
choices
,
value
))
==
0
:
eval_logger
.
warning
(
"{} is not in task list."
.
format
(
value
))
# eval_logger.info(f"{choices} is this")
eval_logger
.
info
(
f
"Available tasks to choose:"
)
for
choice
in
self
.
choices
:
eval_logger
.
info
(
f
"
{
choice
}
"
)
return
True
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment