Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0d1ef037
Commit
0d1ef037
authored
Jan 17, 2024
by
lintangsutawika
Browse files
solved merge conflict
parents
aa44be3f
ada4a31d
Changes
424
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
40 additions
and
27 deletions
+40
-27
lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
+1
-1
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
+1
-1
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
+1
-1
lm_eval/tasks/belebele/_default_template_yaml
lm_eval/tasks/belebele/_default_template_yaml
+1
-3
lm_eval/tasks/belebele/_generate_configs.py
lm_eval/tasks/belebele/_generate_configs.py
+6
-6
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
+2
-1
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
+2
-1
lm_eval/tasks/belebele/belebele_als_Latn.yaml
lm_eval/tasks/belebele/belebele_als_Latn.yaml
+2
-1
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
+2
-1
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
+2
-1
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
+2
-1
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
+2
-1
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
+2
-1
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
+2
-1
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
+2
-1
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
+2
-1
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
+2
-1
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
+2
-1
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
+2
-1
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
+2
-1
No files found.
lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
View file @
0d1ef037
...
@@ -24,4 +24,4 @@ filter_list:
...
@@ -24,4 +24,4 @@ filter_list:
- function: "take_first"
- function: "take_first"
num_fewshot: 0
num_fewshot: 0
metadata:
metadata:
-
version: 0
version:
1.
0
lm_eval/tasks/bbh/fewshot/_fewshot_template_yaml
View file @
0d1ef037
...
@@ -18,4 +18,4 @@ generation_kwargs:
...
@@ -18,4 +18,4 @@ generation_kwargs:
temperature: 0.0
temperature: 0.0
num_fewshot: 0
num_fewshot: 0
metadata:
metadata:
-
version: 0
version:
1.
0
lm_eval/tasks/bbh/zeroshot/_zeroshot_template_yaml
View file @
0d1ef037
...
@@ -18,4 +18,4 @@ generation_kwargs:
...
@@ -18,4 +18,4 @@ generation_kwargs:
temperature: 0.0
temperature: 0.0
num_fewshot: 0
num_fewshot: 0
metadata:
metadata:
-
version: 0
version:
1.
0
lm_eval/tasks/belebele/_default_template_yaml
View file @
0d1ef037
group: belebele
group: belebele
dataset_path: facebook/belebele
dataset_path: facebook/belebele
test_split: test
fewshot_split: test
fewshot_config:
fewshot_config:
sampler: first_n
sampler: first_n
output_type: multiple_choice
output_type: multiple_choice
...
@@ -18,4 +16,4 @@ metric_list:
...
@@ -18,4 +16,4 @@ metric_list:
aggregation: mean
aggregation: mean
higher_is_better: true
higher_is_better: true
metadata:
metadata:
-
version: 0.0
version: 0.0
lm_eval/tasks/belebele/_generate_configs.py
View file @
0d1ef037
...
@@ -8,7 +8,7 @@ import requests
...
@@ -8,7 +8,7 @@ import requests
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lm_eval.
logger
import
eval_
logg
er
from
lm_eval.
utils
import
logg
ing
API_URL
=
"https://datasets-server.huggingface.co/splits?dataset=facebook/belebele"
API_URL
=
"https://datasets-server.huggingface.co/splits?dataset=facebook/belebele"
...
@@ -23,7 +23,6 @@ def parse_args():
...
@@ -23,7 +23,6 @@ def parse_args():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
args
=
parse_args
()
args
=
parse_args
()
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
# get filename of base_yaml so we can `"include": ` it in our other YAMLs.
...
@@ -40,8 +39,8 @@ if __name__ == "__main__":
...
@@ -40,8 +39,8 @@ if __name__ == "__main__":
def
query
():
def
query
():
response
=
requests
.
get
(
API_URL
)
response
=
requests
.
get
(
API_URL
)
return
response
.
json
()[
"splits"
]
return
response
.
json
()[
"splits"
]
print
(
query
())
languages
=
[
split
[
"
config
"
]
for
split
in
query
()]
languages
=
[
split
[
"
split
"
]
for
split
in
query
()]
for
lang
in
tqdm
(
languages
):
for
lang
in
tqdm
(
languages
):
yaml_dict
=
{
yaml_dict
=
{
...
@@ -49,11 +48,12 @@ if __name__ == "__main__":
...
@@ -49,11 +48,12 @@ if __name__ == "__main__":
"task"
:
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
"task"
:
f
"belebele_
{
args
.
task_prefix
}
_
{
lang
}
"
if
args
.
task_prefix
!=
""
if
args
.
task_prefix
!=
""
else
f
"belebele_
{
lang
}
"
,
else
f
"belebele_
{
lang
}
"
,
"dataset_name"
:
lang
,
"test_split"
:
lang
,
"fewshot_split"
:
lang
,
}
}
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
file_save_path
=
args
.
save_prefix_path
+
f
"_
{
lang
}
.yaml"
eval_
logg
er
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
logg
ing
.
info
(
f
"Saving yaml for subset
{
lang
}
to
{
file_save_path
}
"
)
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
with
open
(
file_save_path
,
"w"
)
as
yaml_file
:
yaml
.
dump
(
yaml
.
dump
(
yaml_dict
,
yaml_dict
,
...
...
lm_eval/tasks/belebele/belebele_acm_Arab.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
acm_Arab"
"
fewshot_split
"
:
"
acm_Arab"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_acm_Arab"
"
task"
:
"
belebele_acm_Arab"
"
test_split"
:
"
acm_Arab"
lm_eval/tasks/belebele/belebele_afr_Latn.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
afr_Latn"
"
fewshot_split
"
:
"
afr_Latn"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_afr_Latn"
"
task"
:
"
belebele_afr_Latn"
"
test_split"
:
"
afr_Latn"
lm_eval/tasks/belebele/belebele_als_Latn.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
als_Latn"
"
fewshot_split
"
:
"
als_Latn"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_als_Latn"
"
task"
:
"
belebele_als_Latn"
"
test_split"
:
"
als_Latn"
lm_eval/tasks/belebele/belebele_amh_Ethi.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
amh_Ethi"
"
fewshot_split
"
:
"
amh_Ethi"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_amh_Ethi"
"
task"
:
"
belebele_amh_Ethi"
"
test_split"
:
"
amh_Ethi"
lm_eval/tasks/belebele/belebele_apc_Arab.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
apc_Arab"
"
fewshot_split
"
:
"
apc_Arab"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_apc_Arab"
"
task"
:
"
belebele_apc_Arab"
"
test_split"
:
"
apc_Arab"
lm_eval/tasks/belebele/belebele_arb_Arab.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
arb_Arab"
"
fewshot_split
"
:
"
arb_Arab"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arb_Arab"
"
task"
:
"
belebele_arb_Arab"
"
test_split"
:
"
arb_Arab"
lm_eval/tasks/belebele/belebele_arb_Latn.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
arb_Latn"
"
fewshot_split
"
:
"
arb_Latn"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arb_Latn"
"
task"
:
"
belebele_arb_Latn"
"
test_split"
:
"
arb_Latn"
lm_eval/tasks/belebele/belebele_ars_Arab.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
ars_Arab"
"
fewshot_split
"
:
"
ars_Arab"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ars_Arab"
"
task"
:
"
belebele_ars_Arab"
"
test_split"
:
"
ars_Arab"
lm_eval/tasks/belebele/belebele_ary_Arab.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
ary_Arab"
"
fewshot_split
"
:
"
ary_Arab"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ary_Arab"
"
task"
:
"
belebele_ary_Arab"
"
test_split"
:
"
ary_Arab"
lm_eval/tasks/belebele/belebele_arz_Arab.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
arz_Arab"
"
fewshot_split
"
:
"
arz_Arab"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_arz_Arab"
"
task"
:
"
belebele_arz_Arab"
"
test_split"
:
"
arz_Arab"
lm_eval/tasks/belebele/belebele_asm_Beng.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
asm_Beng"
"
fewshot_split
"
:
"
asm_Beng"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_asm_Beng"
"
task"
:
"
belebele_asm_Beng"
"
test_split"
:
"
asm_Beng"
lm_eval/tasks/belebele/belebele_azj_Latn.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
azj_Latn"
"
fewshot_split
"
:
"
azj_Latn"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_azj_Latn"
"
task"
:
"
belebele_azj_Latn"
"
test_split"
:
"
azj_Latn"
lm_eval/tasks/belebele/belebele_bam_Latn.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
bam_Latn"
"
fewshot_split
"
:
"
bam_Latn"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_bam_Latn"
"
task"
:
"
belebele_bam_Latn"
"
test_split"
:
"
bam_Latn"
lm_eval/tasks/belebele/belebele_ben_Beng.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
ben_Beng"
"
fewshot_split
"
:
"
ben_Beng"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ben_Beng"
"
task"
:
"
belebele_ben_Beng"
"
test_split"
:
"
ben_Beng"
lm_eval/tasks/belebele/belebele_ben_Latn.yaml
View file @
0d1ef037
"
dataset_name
"
:
"
ben_Latn"
"
fewshot_split
"
:
"
ben_Latn"
"
include"
:
"
_default_template_yaml"
"
include"
:
"
_default_template_yaml"
"
task"
:
"
belebele_ben_Latn"
"
task"
:
"
belebele_ben_Latn"
"
test_split"
:
"
ben_Latn"
Prev
1
2
3
4
5
6
7
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment