Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
de71ad92
Unverified
Commit
de71ad92
authored
Oct 17, 2023
by
Lintang Sutawika
Committed by
GitHub
Oct 17, 2023
Browse files
Merge branch 'big-refactor' into fix-unittests
parents
09d20bfa
73c80915
Changes
370
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
121 additions
and
0 deletions
+121
-0
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+64
-0
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
+3
-0
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_als_Latn.yaml
lm_eval/tasks/belebele/belebele_als_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
+3
-0
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
+3
-0
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
+3
-0
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
+3
-0
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
+3
-0
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
+3
-0
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
+3
-0
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
+3
-0
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
+3
-0
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
+3
-0
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
+3
-0
lm_eval/tasks/belebele/belebele_ceb_Latn.yaml
lm_eval/tasks/belebele/belebele_ceb_Latn.yaml
+3
-0
No files found.
lm_eval/tasks/belebele/_generate_configs.py
0 → 100644
View file @
de71ad92
"""
Take in a YAML, and output all other splits with this YAML
"""
import
os
import
yaml
import
argparse
import
requests
from
tqdm
import
tqdm
from
lm_eval.logger
import
eval_logger
API_URL
=
"https://datasets-server.huggingface.co/splits?dataset=facebook/belebele"
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--base_yaml_path"
,
required
=
True
)
parser
.
add_argument
(
"--save_prefix_path"
,
default
=
"belebele"
)
parser
.
add_argument
(
"--cot_prompt_path"
,
default
=
None
)
parser
.
add_argument
(
"--task_prefix"
,
default
=
""
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
parse_args
()
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
base_yaml_name
=
os
.
path
.
split
(
args
.
base_yaml_path
)[
-
1
]
with
open
(
args
.
base_yaml_path
)
as
f
:
base_yaml
=
yaml
.
full_load
(
f
)
if
args
.
cot_prompt_path
is
not
None
:
import
json
with
open
(
args
.
cot_prompt_path
)
as
f
:
cot_file
=
json
.
load
(
f
)
def
query
():
response
=
requests
.
get
(
API_URL
)
return
response
.
json
()[
"splits"
]
languages
=
[
split
[
"config"
]
for
split
in
query
()]
for
lang
in
tqdm
(
languages
):
yaml_dict
=
{
"include"
:
base_yaml_name
,
"task"
:
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
if
args
.
task_prefix
!=
""
else
f
"belebele_
{
lang
}
"
,
"dataset_name"
:
lang
,
}
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
eval_logger
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
yaml
.
dump
(
yaml_dict
,
yaml_file
,
width
=
float
(
"inf"
),
allow_unicode
=
True
,
default_style
=
'"'
,
)
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
acm_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_acm_Arab"
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
afr_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_afr_Latn"
lm_eval/tasks/belebele/belebele_als_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
als_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_als_Latn"
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
amh_Ethi"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_amh_Ethi"
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
apc_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_apc_Arab"
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
arb_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arb_Arab"
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
arb_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arb_Latn"
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
ars_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ars_Arab"
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
ary_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ary_Arab"
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
arz_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arz_Arab"
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
asm_Beng"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_asm_Beng"
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
azj_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_azj_Latn"
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
bam_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bam_Latn"
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
ben_Beng"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ben_Beng"
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
ben_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ben_Latn"
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
bod_Tibt"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bod_Tibt"
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
bul_Cyrl"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bul_Cyrl"
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
cat_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_cat_Latn"
lm_eval/tasks/belebele/belebele_ceb_Latn.yaml
0 → 100644
View file @
de71ad92
"
dataset_name"
:
"
ceb_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ceb_Latn"
Prev
1
2
3
4
5
6
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment