Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0c50bfaf
Commit
0c50bfaf
authored
Jan 02, 2025
by
Baber
Browse files
use chinese colon
parent
40027bca
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
14 deletions
+10
-14
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat.yaml
+1
-1
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_template
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_template
+1
-8
lm_eval/tasks/llama3/instruct/mgsm/utils.py
lm_eval/tasks/llama3/instruct/mgsm/utils.py
+8
-5
No files found.
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat.yaml
View file @
0c50bfaf
...
@@ -16,6 +16,6 @@ aggregate_metric_list:
...
@@ -16,6 +16,6 @@ aggregate_metric_list:
-
metric
:
exact_match
-
metric
:
exact_match
aggregation
:
mean
aggregation
:
mean
weight_by_size
:
True
weight_by_size
:
True
filter_list
:
[
flexible-extract
,
strict-match
]
filter_list
:
[
flexible-extract
]
metadata
:
metadata
:
version
:
0
version
:
0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_template
View file @
0c50bfaf
...
@@ -23,18 +23,11 @@ metric_list:
...
@@ -23,18 +23,11 @@ metric_list:
ignore_case: true
ignore_case: true
ignore_punctuation: true
ignore_punctuation: true
filter_list:
filter_list:
- name: "strict-match"
filter:
- function: "regex"
group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (\\-?[0-9\\.\\,]+)"
- function: remove_whitespace
- function: take_first
- name: "flexible-extract"
- name: "flexible-extract"
filter:
filter:
- function: regex
- function: regex
group_select: -1
group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর)
:
(-?[$0-9.,]{2,})|(-?[0-9]+)"
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর)
[::]
(-?[$0-9.,]{2,})|(-?[0-9]+)"
- function: remove_whitespace
- function: remove_whitespace
- function: take_first
- function: take_first
metadata:
metadata:
...
...
lm_eval/tasks/llama3/instruct/mgsm/utils.py
View file @
0c50bfaf
from
functools
import
partial
from
functools
import
partial
from
typing
import
List
from
typing
import
TYPE_CHECKING
,
Dict
,
List
import
datasets
if
TYPE_CHECKING
:
import
datasets
from
lm_eval.api.metrics
import
exact_match_fn
from
lm_eval.api.metrics
import
exact_match_fn
...
@@ -54,7 +56,7 @@ PROMPTS = [
...
@@ -54,7 +56,7 @@ PROMPTS = [
]
]
def
number_variations
(
n
)
:
def
number_variations
(
n
:
int
)
->
List
[
str
]
:
formats
=
[]
formats
=
[]
# Generate each pattern twice
# Generate each pattern twice
for
_
in
range
(
2
):
for
_
in
range
(
2
):
...
@@ -73,7 +75,7 @@ def number_variations(n):
...
@@ -73,7 +75,7 @@ def number_variations(n):
return
formats
return
formats
def
process_docs
(
lang
:
str
,
df
:
datasets
.
Dataset
)
->
datasets
.
Dataset
:
def
process_docs
(
lang
:
str
,
df
:
"
datasets.Dataset
"
)
->
"
datasets.Dataset
"
:
def
map_
(
doc
:
dict
):
def
map_
(
doc
:
dict
):
suffix
=
[
x
for
x
in
PROMPTS
if
x
[
"subtask_name"
]
==
lang
][
0
][
"rep"
]
suffix
=
[
x
for
x
in
PROMPTS
if
x
[
"subtask_name"
]
==
lang
][
0
][
"rep"
]
doc
[
"question"
]
=
suffix
+
"
\n\n
"
+
doc
[
"question"
].
split
(
":"
,
1
)[
-
1
]
doc
[
"question"
]
=
suffix
+
"
\n\n
"
+
doc
[
"question"
].
split
(
":"
,
1
)[
-
1
]
...
@@ -96,7 +98,7 @@ process_docs_th = partial(process_docs, "th")
...
@@ -96,7 +98,7 @@ process_docs_th = partial(process_docs, "th")
process_docs_zh
=
partial
(
process_docs
,
"zh"
)
process_docs_zh
=
partial
(
process_docs
,
"zh"
)
def
process_results
(
doc
,
prediction
)
:
def
process_results
(
doc
:
dict
,
prediction
:
List
[
str
])
->
Dict
[
str
,
int
]
:
gold
:
List
=
doc
[
"answers"
]
gold
:
List
=
doc
[
"answers"
]
return
{
return
{
"exact_match"
:
int
(
"exact_match"
:
int
(
...
@@ -104,6 +106,7 @@ def process_results(doc, prediction):
...
@@ -104,6 +106,7 @@ def process_results(doc, prediction):
predictions
=
[
x
.
strip
()
for
x
in
prediction
]
*
len
(
gold
),
predictions
=
[
x
.
strip
()
for
x
in
prediction
]
*
len
(
gold
),
references
=
gold
,
references
=
gold
,
ignore_case
=
True
,
ignore_case
=
True
,
ignore_punctuation
=
True
,
)[
"exact_match"
]
)[
"exact_match"
]
>
0
>
0
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment