Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
eeec6dae
Unverified
Commit
eeec6dae
authored
Jul 12, 2024
by
Hailey Schoelkopf
Committed by
GitHub
Jul 12, 2024
Browse files
make RougeScorer only initialized once (#2090)
parent
a0243d54
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
2 deletions
+17
-2
lm_eval/tasks/tinyBenchmarks/utils_truthfulqa.py
lm_eval/tasks/tinyBenchmarks/utils_truthfulqa.py
+7
-1
lm_eval/tasks/truthfulqa/utils.py
lm_eval/tasks/truthfulqa/utils.py
+10
-1
No files found.
lm_eval/tasks/tinyBenchmarks/utils_truthfulqa.py
View file @
eeec6dae
...
@@ -6,6 +6,8 @@ from rouge_score import rouge_scorer, scoring
...
@@ -6,6 +6,8 @@ from rouge_score import rouge_scorer, scoring
""" This code mirrors the utils of the original truthful_qa task """
""" This code mirrors the utils of the original truthful_qa task """
ROUGE_SCORER
=
None
def
process_results_mc2
(
doc
,
results
):
def
process_results_mc2
(
doc
,
results
):
lls
,
is_greedy
=
zip
(
*
results
)
lls
,
is_greedy
=
zip
(
*
results
)
...
@@ -153,7 +155,11 @@ def rouge(refs, preds):
...
@@ -153,7 +155,11 @@ def rouge(refs, preds):
A `list` of predicted `strs`.
A `list` of predicted `strs`.
"""
"""
rouge_types
=
[
"rouge1"
,
"rouge2"
,
"rougeLsum"
]
rouge_types
=
[
"rouge1"
,
"rouge2"
,
"rougeLsum"
]
scorer
=
rouge_scorer
.
RougeScorer
(
rouge_types
)
global
ROUGE_SCORER
if
ROUGE_SCORER
is
None
:
# init RougeScorer once (https://github.com/EleutherAI/lm-evaluation-harness/issues/1692)--rouge_types are constant
ROUGE_SCORER
=
rouge_scorer
.
RougeScorer
(
rouge_types
)
scorer
=
ROUGE_SCORER
# Add newlines between sentences to correctly compute `rougeLsum`.
# Add newlines between sentences to correctly compute `rougeLsum`.
def
_prepare_summary
(
summary
):
def
_prepare_summary
(
summary
):
...
...
lm_eval/tasks/truthfulqa/utils.py
View file @
eeec6dae
...
@@ -4,6 +4,9 @@ import sacrebleu
...
@@ -4,6 +4,9 @@ import sacrebleu
from
rouge_score
import
rouge_scorer
,
scoring
from
rouge_score
import
rouge_scorer
,
scoring
ROUGE_SCORER
=
None
def
process_results_mc2
(
doc
,
results
):
def
process_results_mc2
(
doc
,
results
):
lls
,
is_greedy
=
zip
(
*
results
)
lls
,
is_greedy
=
zip
(
*
results
)
...
@@ -149,8 +152,14 @@ def rouge(refs, preds):
...
@@ -149,8 +152,14 @@ def rouge(refs, preds):
:param preds:
:param preds:
A `list` of predicted `strs`.
A `list` of predicted `strs`.
"""
"""
rouge_types
=
[
"rouge1"
,
"rouge2"
,
"rougeLsum"
]
rouge_types
=
[
"rouge1"
,
"rouge2"
,
"rougeLsum"
]
scorer
=
rouge_scorer
.
RougeScorer
(
rouge_types
)
global
ROUGE_SCORER
if
ROUGE_SCORER
is
None
:
# init RougeScorer once (https://github.com/EleutherAI/lm-evaluation-harness/issues/1692)--rouge_types are constant
ROUGE_SCORER
=
rouge_scorer
.
RougeScorer
(
rouge_types
)
scorer
=
ROUGE_SCORER
# Add newlines between sentences to correctly compute `rougeLsum`.
# Add newlines between sentences to correctly compute `rougeLsum`.
def
_prepare_summary
(
summary
):
def
_prepare_summary
(
summary
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment