Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a2009452
Commit
a2009452
authored
Sep 19, 2023
by
lintangsutawika
Browse files
format
parent
4578ca14
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
12 deletions
+20
-12
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-6
lm_eval/tasks/squadv2/utils.py
lm_eval/tasks/squadv2/utils.py
+15
-5
lm_eval/tasks/squadv2/with_noans_prob.yaml
lm_eval/tasks/squadv2/with_noans_prob.yaml
+1
-1
main.py
main.py
+1
-0
No files found.
lm_eval/tasks/__init__.py
View file @
a2009452
...
@@ -37,15 +37,12 @@ def register_configurable_task(config: Dict[str, str]) -> int:
...
@@ -37,15 +37,12 @@ def register_configurable_task(config: Dict[str, str]) -> int:
return
0
return
0
def
register_configurable_group
(
config
:
Dict
[
str
,
str
])
->
int
:
def
register_configurable_group
(
config
:
Dict
[
str
,
str
])
->
int
:
group
=
config
[
"group"
]
group
=
config
[
"group"
]
all_task_list
=
config
[
"task"
]
all_task_list
=
config
[
"task"
]
config_list
=
[
config_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
!=
str
]
task
for
task
in
all_task_list
if
type
(
task
)
!=
str
task_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
==
str
]
]
task_list
=
[
task
for
task
in
all_task_list
if
type
(
task
)
==
str
]
for
task_config
in
config_list
:
for
task_config
in
config_list
:
var_configs
=
check_prompt_config
(
var_configs
=
check_prompt_config
(
...
...
lm_eval/tasks/squadv2/utils.py
View file @
a2009452
...
@@ -2,28 +2,38 @@ import re
...
@@ -2,28 +2,38 @@ import re
import
string
import
string
import
collections
import
collections
def
normalize_answer
(
s
):
def
normalize_answer
(
s
):
"""Lower text and remove punctuation, articles and extra whitespace."""
"""Lower text and remove punctuation, articles and extra whitespace."""
def
remove_articles
(
text
):
def
remove_articles
(
text
):
regex
=
re
.
compile
(
r
'\b(a|an|the)\b'
,
re
.
UNICODE
)
regex
=
re
.
compile
(
r
"\b(a|an|the)\b"
,
re
.
UNICODE
)
return
re
.
sub
(
regex
,
' '
,
text
)
return
re
.
sub
(
regex
,
" "
,
text
)
def
white_space_fix
(
text
):
def
white_space_fix
(
text
):
return
' '
.
join
(
text
.
split
())
return
" "
.
join
(
text
.
split
())
def
remove_punc
(
text
):
def
remove_punc
(
text
):
exclude
=
set
(
string
.
punctuation
)
exclude
=
set
(
string
.
punctuation
)
return
''
.
join
(
ch
for
ch
in
text
if
ch
not
in
exclude
)
return
""
.
join
(
ch
for
ch
in
text
if
ch
not
in
exclude
)
def
lower
(
text
):
def
lower
(
text
):
return
text
.
lower
()
return
text
.
lower
()
return
white_space_fix
(
remove_articles
(
remove_punc
(
lower
(
s
))))
return
white_space_fix
(
remove_articles
(
remove_punc
(
lower
(
s
))))
def
get_tokens
(
s
):
def
get_tokens
(
s
):
if
not
s
:
return
[]
if
not
s
:
return
[]
return
normalize_answer
(
s
).
split
()
return
normalize_answer
(
s
).
split
()
# Exact match (the normalized answer exactly match the gold answer)
# Exact match (the normalized answer exactly match the gold answer)
def
exact
(
predictions
,
references
):
def
exact
(
predictions
,
references
):
return
int
(
normalize_answer
(
references
[
0
])
==
normalize_answer
(
predictions
[
0
]))
return
int
(
normalize_answer
(
references
[
0
])
==
normalize_answer
(
predictions
[
0
]))
# The F-score of predicted tokens versus the gold answer
# The F-score of predicted tokens versus the gold answer
def
f1
(
predictions
,
references
):
def
f1
(
predictions
,
references
):
gold_toks
=
get_tokens
(
references
[
0
])
gold_toks
=
get_tokens
(
references
[
0
])
...
...
lm_eval/tasks/squadv2/with_noans_prob.yaml
View file @
a2009452
main.py
View file @
a2009452
...
@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils
...
@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils
from
lm_eval.api.registry
import
ALL_TASKS
from
lm_eval.api.registry
import
ALL_TASKS
from
lm_eval.logger
import
eval_logger
,
SPACING
from
lm_eval.logger
import
eval_logger
,
SPACING
from
lm_eval.tasks
import
include_task_folder
from
lm_eval.tasks
import
include_task_folder
# from lm_eval.benchmarks import include_benchmarks
# from lm_eval.benchmarks import include_benchmarks
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment