Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f555a583
Commit
f555a583
authored
Aug 30, 2023
by
lintangsutawika
Browse files
fix formatting
parent
64d4600c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
6 deletions
+5
-6
lm_eval/tasks/coqa/utils.py
lm_eval/tasks/coqa/utils.py
+5
-6
No files found.
lm_eval/tasks/coqa/utils.py
View file @
f555a583
...
@@ -2,6 +2,7 @@ from itertools import zip_longest
...
@@ -2,6 +2,7 @@ from itertools import zip_longest
import
transformers.data.metrics.squad_metrics
as
squad_metrics
import
transformers.data.metrics.squad_metrics
as
squad_metrics
def
doc_to_text
(
doc
):
def
doc_to_text
(
doc
):
# Given a passage p, the conversation history {q1, a1, . . . qi−1, ai−1}
# Given a passage p, the conversation history {q1, a1, . . . qi−1, ai−1}
# and a question qi, the task is to predict the answer ai
# and a question qi, the task is to predict the answer ai
...
@@ -41,14 +42,13 @@ def em(gold_list, pred):
...
@@ -41,14 +42,13 @@ def em(gold_list, pred):
for
i
in
range
(
len
(
gold_list
)):
for
i
in
range
(
len
(
gold_list
)):
gold_answers
=
gold_list
[
0
:
i
]
+
gold_list
[
i
+
1
:]
gold_answers
=
gold_list
[
0
:
i
]
+
gold_list
[
i
+
1
:]
# predictions compared against (n) golds and take maximum
# predictions compared against (n) golds and take maximum
em_sum
+=
max
(
em_sum
+=
max
(
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_answers
)
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_answers
)
else
:
else
:
em_sum
+=
max
(
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_list
)
em_sum
+=
max
(
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_list
)
return
em_sum
/
max
(
1
,
len
(
gold_list
))
return
em_sum
/
max
(
1
,
len
(
gold_list
))
def
compute_scores
(
gold_list
,
pred
):
def
compute_scores
(
gold_list
,
pred
):
# tests for exact match and on the normalised answer (compute_exact)
# tests for exact match and on the normalised answer (compute_exact)
# test for overlap (compute_f1)
# test for overlap (compute_f1)
...
@@ -58,9 +58,7 @@ def compute_scores(gold_list, pred):
...
@@ -58,9 +58,7 @@ def compute_scores(gold_list, pred):
for
i
in
range
(
len
(
gold_list
)):
for
i
in
range
(
len
(
gold_list
)):
gold_answers
=
gold_list
[
0
:
i
]
+
gold_list
[
i
+
1
:]
gold_answers
=
gold_list
[
0
:
i
]
+
gold_list
[
i
+
1
:]
# predictions compared against (n) golds and take maximum
# predictions compared against (n) golds and take maximum
em_sum
+=
max
(
em_sum
+=
max
(
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_answers
)
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_answers
)
f1_sum
+=
max
(
squad_metrics
.
compute_f1
(
a
,
pred
)
for
a
in
gold_answers
)
f1_sum
+=
max
(
squad_metrics
.
compute_f1
(
a
,
pred
)
for
a
in
gold_answers
)
else
:
else
:
em_sum
+=
max
(
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_list
)
em_sum
+=
max
(
squad_metrics
.
compute_exact
(
a
,
pred
)
for
a
in
gold_list
)
...
@@ -71,6 +69,7 @@ def compute_scores(gold_list, pred):
...
@@ -71,6 +69,7 @@ def compute_scores(gold_list, pred):
"f1"
:
f1_sum
/
max
(
1
,
len
(
gold_list
)),
"f1"
:
f1_sum
/
max
(
1
,
len
(
gold_list
)),
}
}
def
process_results
(
doc
,
results
):
def
process_results
(
doc
,
results
):
gold_list
=
doc_to_target
(
doc
)
gold_list
=
doc_to_target
(
doc
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment