Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
6884c5a0
Commit
6884c5a0
authored
Jan 02, 2025
by
Baber
Browse files
add mgsm
parent
b0108cf8
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
65 additions
and
44 deletions
+65
-44
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat.yaml
+19
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_bn.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_bn.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_de.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_de.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_en.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_en.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_es.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_es.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_fr.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_fr.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_ja.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_ja.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_ru.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_ru.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_sw.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_sw.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_te.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_te.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_th.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_th.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_zh.yaml
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_zh.yaml
+4
-0
lm_eval/tasks/llama3/instruct/mgsm/utils.py
lm_eval/tasks/llama3/instruct/mgsm/utils.py
+2
-2
lm_eval/tasks/llama3/instruct/mgsm_chat.yaml
lm_eval/tasks/llama3/instruct/mgsm_chat.yaml
+0
-42
No files found.
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat.yaml
0 → 100644
View file @
6884c5a0
group
:
mgsm_chat
group_alias
:
mmlu (llama)
task
:
-
mgsm_chat_bn
-
mgsm_chat_de
-
mgsm_chat_en
-
mgsm_chat_es
-
mgsm_chat_fr
-
mgsm_chat_ja
-
mgsm_chat_ru
-
mgsm_chat_sw
-
mgsm_chat_te
-
mgsm_chat_th
-
mgsm_chat_zh
aggregate_metric_list
:
-
metric
:
exact_match
weight_by_size
:
True
metadata
:
version
:
0
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_bn.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
bn
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_bn
task
:
mgsm_chat_bn
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_de.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
de
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_de
task
:
mgsm_chat_de
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_en.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
en
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_en
task
:
mgsm_chat_en
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_es.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
es
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_es
task
:
mgsm_chat_es
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_fr.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
fr
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_fr
task
:
mgsm_chat_fr
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_ja.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
ja
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_ja
task
:
mgsm_chat_ja
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_ru.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
ru
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_ru
task
:
mgsm_chat_ru
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_sw.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
sw
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_sw
task
:
mgsm_chat_sw
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_te.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
te
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_te
task
:
mgsm_chat_te
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_th.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
th
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_th
task
:
mgsm_chat_th
lm_eval/tasks/llama3/instruct/mgsm/mgsm_chat_zh.yaml
0 → 100644
View file @
6884c5a0
include
:
mgsm_chat_template
dataset_name
:
zh
# Overridden by language-specific config.
process_docs
:
!function
utils.process_docs_zh
task
:
mgsm_chat_zh
lm_eval/tasks/llama3/instruct/mgsm/utils.py
View file @
6884c5a0
...
@@ -76,7 +76,7 @@ def number_variations(n):
...
@@ -76,7 +76,7 @@ def number_variations(n):
def
process_docs
(
lang
:
str
,
df
:
datasets
.
Dataset
)
->
datasets
.
Dataset
:
def
process_docs
(
lang
:
str
,
df
:
datasets
.
Dataset
)
->
datasets
.
Dataset
:
def
map_
(
doc
:
dict
):
def
map_
(
doc
:
dict
):
suffix
=
[
x
for
x
in
PROMPTS
if
x
[
"subtask_name"
]
==
lang
][
0
][
"rep"
]
suffix
=
[
x
for
x
in
PROMPTS
if
x
[
"subtask_name"
]
==
lang
][
0
][
"rep"
]
doc
[
"question"
]
=
suffix
+
r
"\n\n"
+
doc
[
"question"
].
split
(
":"
,
1
)[
-
1
]
doc
[
"question"
]
=
suffix
+
"
\n\n
"
+
doc
[
"question"
].
split
(
":"
,
1
)[
-
1
]
doc
[
"answers"
]
=
number_variations
(
doc
[
"answer_number"
])
doc
[
"answers"
]
=
number_variations
(
doc
[
"answer_number"
])
return
doc
return
doc
...
@@ -97,7 +97,7 @@ process_docs_zh = partial(process_docs, "zh")
...
@@ -97,7 +97,7 @@ process_docs_zh = partial(process_docs, "zh")
def
process_results
(
doc
,
prediction
):
def
process_results
(
doc
,
prediction
):
gold
:
List
=
doc
[
"
input_correct_response
s"
]
gold
:
List
=
doc
[
"
answer
s"
]
return
{
return
{
"exact_match"
:
int
(
"exact_match"
:
int
(
exact_match_fn
(
exact_match_fn
(
...
...
lm_eval/tasks/llama3/instruct/mgsm_chat.yaml
deleted
100644 → 0
View file @
b0108cf8
tag
:
llama3
task
:
mgsm_chat
dataset_path
:
meta-llama/Llama-3.2-3B-Instruct-evals
dataset_name
:
Llama-3.2-3B-Instruct-evals__mgsm__details
output_type
:
generate_until
test_split
:
latest
doc_to_text
:
"
{{
input_final_prompts
|first
|replace('<|start_header_id|>user<|end_header_id|>',
'')
|replace('<|eot_id|><|start_header_id|>assistant<|end_header_id|>',
'')
|trim
}}"
doc_to_target
:
"
input_correct_responses"
process_results
:
!function
utils.process_results_mgsm
generation_kwargs
:
until
:
[]
do_sample
:
false
temperature
:
0.0
max_gen_toks
:
2048
metric_list
:
-
metric
:
exact_match
aggregation
:
mean
higher_is_better
:
true
ignore_case
:
true
ignore_punctuation
:
true
filter_list
:
-
name
:
"
strict-match"
filter
:
-
function
:
"
regex"
regex_pattern
:
"
(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর):
(
\\
-?[0-9
\\
.
\\
,]+)"
-
function
:
remove_whitespace
-
function
:
take_first
-
name
:
"
flexible-extract"
filter
:
-
function
:
regex
group_select
:
-1
regex_pattern
:
"
(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর):
(-?[$0-9.,]{2,})|(-?[0-9]+)"
-
function
:
remove_whitespace
-
function
:
take_first
metadata
:
version
:
0.0
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment