Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
741a6a69
Commit
741a6a69
authored
Aug 20, 2024
by
lintangsutawika
Browse files
Merge branch 'main' of
https://github.com/EleutherAI/lm-evaluation-harness
into mela
parents
494a4515
b536f067
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
124 additions
and
74 deletions
+124
-74
lm_eval/tasks/belebele/_default_template_yaml
lm_eval/tasks/belebele/_default_template_yaml
+1
-2
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+33
-0
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
+5
-4
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
+5
-4
lm_eval/tasks/belebele/belebele_als_Latn.yaml
lm_eval/tasks/belebele/belebele_als_Latn.yaml
+5
-4
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
+5
-4
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
+5
-4
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
+5
-4
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
+5
-4
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
+5
-4
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
+5
-4
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
+5
-4
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
+5
-4
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
+5
-4
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
+5
-4
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
+5
-4
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
+5
-4
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
+5
-4
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
+5
-4
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
+5
-4
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
lm_eval/tasks/belebele/_default_template_yaml
View file @
741a6a69
group: belebele
dataset_path: facebook/belebele
fewshot_config:
sampler: first_n
output_type: multiple_choice
should_decontaminate: true
doc_to_decontamination_query: "{{question}}"
doc_to_text: "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nAnswer
:
"
doc_to_text: "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nAnswer
:
"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
metric_list:
...
...
lm_eval/tasks/belebele/_generate_configs.py
View file @
741a6a69
...
...
@@ -65,3 +65,36 @@ if __name__ == "__main__":
allow_unicode
=
True
,
default_style
=
'"'
,
)
# write group config out
group_yaml_dict
=
{
"group"
:
f
"belebele_
{
args
.
task_prefix
}
"
if
args
.
task_prefix
!=
""
else
"belebele"
,
"task"
:
[
(
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
if
args
.
task_prefix
!=
""
else
f
"belebele_
{
lang
}
"
)
for
lang
in
languages
if
"default"
not
in
lang
],
"aggregate_metric_list"
:
[
{
"metric"
:
"acc"
,
"aggregation"
:
"mean"
,
"weight_by_size"
:
False
},
{
"metric"
:
"acc_norm"
,
"aggregation"
:
"mean"
,
"weight_by_size"
:
False
},
],
"metadata"
:
{
"version"
:
0.0
},
}
file_save_path
=
"_"
+
args
.
save_prefix_path
+
f
"
{
args
.
task_prefix
}
.yaml"
with
open
(
file_save_path
,
"w"
,
encoding
=
"utf-8"
)
as
group_yaml_file
:
yaml
.
dump
(
group_yaml_dict
,
group_yaml_file
,
width
=
float
(
"inf"
),
allow_unicode
=
True
,
default_style
=
'"'
,
)
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
View file @
741a6a69
"
fewshot_split"
:
"
acm_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_acm_Arab"
"
test_split"
:
"
acm_Arab"
dataset_name
:
acm_Arab
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_acm_Arab
test_split
:
test
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
afr_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_afr_Latn"
"
test_split"
:
"
afr_Latn"
dataset_name
:
afr_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_afr_Latn
test_split
:
test
lm_eval/tasks/belebele/belebele_als_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
als_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_als_Latn"
"
test_split"
:
"
als_Latn"
dataset_name
:
als_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_als_Latn
test_split
:
test
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
View file @
741a6a69
"
fewshot_split"
:
"
amh_Ethi"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_amh_Ethi"
"
test_split"
:
"
amh_Ethi"
dataset_name
:
amh_Ethi
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_amh_Ethi
test_split
:
test
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
View file @
741a6a69
"
fewshot_split"
:
"
apc_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_apc_Arab"
"
test_split"
:
"
apc_Arab"
dataset_name
:
apc_Arab
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_apc_Arab
test_split
:
test
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
View file @
741a6a69
"
fewshot_split"
:
"
arb_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arb_Arab"
"
test_split"
:
"
arb_Arab"
dataset_name
:
arb_Arab
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_arb_Arab
test_split
:
test
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
arb_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arb_Latn"
"
test_split"
:
"
arb_Latn"
dataset_name
:
arb_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_arb_Latn
test_split
:
test
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
View file @
741a6a69
"
fewshot_split"
:
"
ars_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ars_Arab"
"
test_split"
:
"
ars_Arab"
dataset_name
:
ars_Arab
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_ars_Arab
test_split
:
test
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
View file @
741a6a69
"
fewshot_split"
:
"
ary_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ary_Arab"
"
test_split"
:
"
ary_Arab"
dataset_name
:
ary_Arab
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_ary_Arab
test_split
:
test
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
View file @
741a6a69
"
fewshot_split"
:
"
arz_Arab"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arz_Arab"
"
test_split"
:
"
arz_Arab"
dataset_name
:
arz_Arab
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_arz_Arab
test_split
:
test
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
View file @
741a6a69
"
fewshot_split"
:
"
asm_Beng"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_asm_Beng"
"
test_split"
:
"
asm_Beng"
dataset_name
:
asm_Beng
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_asm_Beng
test_split
:
test
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
azj_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_azj_Latn"
"
test_split"
:
"
azj_Latn"
dataset_name
:
azj_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_azj_Latn
test_split
:
test
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
bam_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bam_Latn"
"
test_split"
:
"
bam_Latn"
dataset_name
:
bam_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_bam_Latn
test_split
:
test
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
View file @
741a6a69
"
fewshot_split"
:
"
ben_Beng"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ben_Beng"
"
test_split"
:
"
ben_Beng"
dataset_name
:
ben_Beng
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_ben_Beng
test_split
:
test
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
ben_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ben_Latn"
"
test_split"
:
"
ben_Latn"
dataset_name
:
ben_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_ben_Latn
test_split
:
test
lm_eval/tasks/belebele/belebele_bod_Tibt.yaml
View file @
741a6a69
"
fewshot_split"
:
"
bod_Tibt"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bod_Tibt"
"
test_split"
:
"
bod_Tibt"
dataset_name
:
bod_Tibt
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_bod_Tibt
test_split
:
test
lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml
View file @
741a6a69
"
fewshot_split"
:
"
bul_Cyrl"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bul_Cyrl"
"
test_split"
:
"
bul_Cyrl"
dataset_name
:
bul_Cyrl
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_bul_Cyrl
test_split
:
test
lm_eval/tasks/belebele/belebele_cat_Latn.yaml
View file @
741a6a69
"
fewshot_split"
:
"
cat_Latn"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_cat_Latn"
"
test_split"
:
"
cat_Latn"
dataset_name
:
cat_Latn
fewshot_split
:
test
include
:
_default_template_yaml
task
:
belebele_cat_Latn
test_split
:
test
Prev
1
…
15
16
17
18
19
20
21
22
23
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment