Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
224e9046
Commit
224e9046
authored
May 15, 2024
by
Israel Abebe Azime
Browse files
update on afrimgsm
parent
cb979e6c
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
84 additions
and
39 deletions
+84
-39
lm_eval/tasks/afrimgsm/gen_yaml.sh
lm_eval/tasks/afrimgsm/gen_yaml.sh
+5
-0
lm_eval/tasks/afrimgsm/gpt_direct.sh
lm_eval/tasks/afrimgsm/gpt_direct.sh
+0
-25
lm_eval/tasks/afrimgsm/utils.py
lm_eval/tasks/afrimgsm/utils.py
+79
-14
No files found.
lm_eval/tasks/afrimgsm/gen_yaml.sh
0 → 100644
View file @
224e9046
#!/bin/bash
python utils.py
--overwrite
--output-dir
direct
--mode
direct
# python utils.py --overwrite --output-dir en_cot --mode en-cot
# python utils.py --overwrite --output-dir native_cot --mode native-cot
lm_eval/tasks/afrimgsm/gpt_direct.sh
deleted
100755 → 0
View file @
cb979e6c
#!/bin/bash
models
=(
"gpt-3.5-turbo"
"gpt-4-0125-preview"
)
task
=
afrimgsm_direct_amh,afrimgsm_direct_eng,afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for
model
in
"
${
models
[@]
}
"
do
echo
"Evaluating model:
$model
"
for
fewshot
in
0 2 4 6 8
do
export
OUTPUT_DIR
=
results/
$fewshot
mkdir
-p
"
$OUTPUT_DIR
"
lm_eval
--model
openai-chat-completions
\
--model_args
model
=
"
${
model
}
"
\
--tasks
$task
\
--output_path
"
$OUTPUT_DIR
"
\
--num_fewshot
$fewshot
\
--verbosity
DEBUG
done
done
\ No newline at end of file
lm_eval/tasks/afrimgsm/utils.py
View file @
224e9046
import
argparse
import
argparse
import
yaml
import
yaml
languages
=
[
'eng'
,
'amh'
,
'ibo'
,
'fra'
,
'sna'
,
'lin'
,
'wol'
,
'ewe'
,
'lug'
,
'xho'
,
'kin'
,
'twi'
,
'zul'
,
'orm'
,
'yor'
,
'hau'
,
'sot'
,
'swa'
]
languages
=
[
'eng'
,
'amh'
,
'ibo'
,
'fra'
,
'sna'
,
'lin'
,
'wol'
,
'ewe'
,
'lug'
,
'xho'
,
'kin'
,
'twi'
,
'zul'
,
'orm'
,
'yor'
,
'hau'
,
'sot'
,
'swa'
]
configs
=
{
LANGUAGES
=
{}
"QUESTION"
:
"Question:"
,
"ANSWER"
:
"Step-by-Step Answer:"
,
for
lang
in
languages
:
"DIRECT"
:
"Answer:"
,
LANGUAGES
[
lang
]
=
{
# English
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
}
"QUESTION"
:
"Question:"
,
"ANSWER"
:
"Step-by-Step Answer:"
,
"DIRECT"
:
"Answer:"
,
"REGEX"
:
"The answer is (
\\
-?[0-9
\\
.
\\
,]+)"
}
def
add_regex_pattern
(
regex_pattern
):
if
regex_pattern
is
None
:
return
{}
return
{
"filter_list"
:
[
{
"name"
:
"strict-match"
,
"filter"
:
[
{
"function"
:
"regex"
,
"regex_pattern"
:
f
"""
{
regex_pattern
}
"""
,
},
{
"function"
:
"take_first"
,
},
],
},
{
"name"
:
"flexible-extract"
,
"filter"
:
[
{
"function"
:
"regex"
,
"regex_pattern"
:
"""(-?[$0-9.,]{2,})|(-?[0-9]+)"""
,
"group_select"
:
-
1
,
},
{
"function"
:
"take_first"
,
},
],
},
],
}
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
,
mode
:
str
)
->
None
:
def
gen_lang_yamls
(
output_dir
:
str
,
overwrite
:
bool
,
mode
:
str
)
->
None
:
...
@@ -18,19 +55,31 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -18,19 +55,31 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
:param overwrite: Whether to overwrite files if they already exist.
:param overwrite: Whether to overwrite files if they already exist.
"""
"""
err
=
[]
err
=
[]
for
lang
in
languages
:
for
lang
in
LANGUAGES
.
keys
()
:
try
:
try
:
QUESTION
=
LANGUAGES
[
lang
][
"QUESTION"
]
yaml_template
=
"cot_yaml"
filter_list
=
{}
DELIMITER
=
None
if
mode
==
"direct"
:
if
mode
==
"direct"
:
ANSWER
=
LANGUAGES
[
lang
][
"DIRECT"
]
REGEX
=
None
task_name
=
f
"afrimgsm_direct_
{
lang
}
"
task_name
=
f
"afrimgsm_direct_
{
lang
}
"
yaml_template
=
"
afrimgsm_common
_yaml"
yaml_template
=
"
direct
_yaml"
elif
mode
==
"native-cot"
:
elif
mode
==
"native-cot"
:
ANSWER
=
LANGUAGES
[
lang
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
lang
][
"REGEX"
]
task_name
=
f
"afrimgsm_native_cot_
{
lang
}
"
task_name
=
f
"afrimgsm_native_cot_
{
lang
}
"
yaml_template
=
"afrimgsm_common_yaml"
filter_list
=
add_regex_pattern
(
REGEX
)
DELIMITER
=
""
if
lang
in
[
"zh"
,
"ja"
]
else
None
elif
mode
==
"en-cot"
:
elif
mode
==
"en-cot"
:
ANSWER
=
LANGUAGES
[
"en"
][
"ANSWER"
]
REGEX
=
LANGUAGES
[
"en"
][
"REGEX"
]
task_name
=
f
"afrimgsm_en_cot_
{
lang
}
"
task_name
=
f
"afrimgsm_en_cot_
{
lang
}
"
yaml_template
=
"afrimgsm_common_yaml"
file_name
=
f
"
{
task_name
}
.yaml"
file_name
=
f
"
{
task_name
}
.yaml"
ANSWER_TO_SKIP
=
len
(
LANGUAGES
[
lang
][
"ANSWER"
])
+
1
with
open
(
with
open
(
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
f
"
{
output_dir
}
/
{
file_name
}
"
,
"w"
if
overwrite
else
"x"
,
encoding
=
"utf8"
)
as
f
:
)
as
f
:
...
@@ -39,7 +88,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
...
@@ -39,7 +88,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
{
{
"include"
:
yaml_template
,
"include"
:
yaml_template
,
"dataset_name"
:
lang
,
"dataset_name"
:
lang
,
"task"
:
f
"
{
task_name
}
"
"task"
:
f
"
{
task_name
}
"
,
"doc_to_text"
:
f
"""{{% if answer is not none %}}"""
f
"""{{{{question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{% else %}}"""
f
"""{{{{"
{
QUESTION
}
"+question+"
\\
n
{
ANSWER
}
"}}}}"""
f
"""{{% endif %}}"""
,
"doc_to_target"
:
f
"""{{% if answer is not none %}}"""
f
"""{{{{answer[
{
ANSWER_TO_SKIP
}
:]}}}}"""
f
"""{{% else %}}"""
f
"""{{{{answer_number|string}}}}"""
f
"""{{% endif %}}"""
,
**
filter_list
,
"generation_kwargs"
:
{
"until"
:
[
QUESTION
,
"</s>"
,
"<|im_end|>"
],
"do_sample"
:
False
,
},
**
({
"target_delimiter"
:
DELIMITER
}
if
DELIMITER
else
{}),
},
},
f
,
f
,
allow_unicode
=
True
,
allow_unicode
=
True
,
...
@@ -60,16 +125,16 @@ def main() -> None:
...
@@ -60,16 +125,16 @@ def main() -> None:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
parser
.
add_argument
(
"--overwrite"
,
"--overwrite"
,
default
=
Tru
e
,
default
=
Fals
e
,
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Overwrite files if they already exist"
,
help
=
"Overwrite files if they already exist"
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output-dir"
,
default
=
".
/direct
"
,
help
=
"Directory to write yaml files to"
"--output-dir"
,
default
=
"."
,
help
=
"Directory to write yaml files to"
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--mode"
,
"--mode"
,
default
=
"
direc
t"
,
default
=
"
native-co
t"
,
choices
=
[
"direct"
,
"native-cot"
,
"en-cot"
],
choices
=
[
"direct"
,
"native-cot"
,
"en-cot"
],
help
=
"Mode of chain-of-thought"
,
help
=
"Mode of chain-of-thought"
,
)
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment