Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
bf11ac93
Commit
bf11ac93
authored
Mar 03, 2025
by
Baber
Browse files
Merge branch 'main' into llama
parents
83b1c564
ade01428
Changes
204
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
180 additions
and
0 deletions
+180
-0
lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml
+6
-0
lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml
+9
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml
+5
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml
+5
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml
+6
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml
+6
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml
+6
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml
+6
-0
lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml
lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml
+9
-0
lm_eval/tasks/evalita_llm/_faq_template_yaml
lm_eval/tasks/evalita_llm/_faq_template_yaml
+8
-0
lm_eval/tasks/evalita_llm/_hs_template_yaml
lm_eval/tasks/evalita_llm/_hs_template_yaml
+9
-0
lm_eval/tasks/evalita_llm/_ls_template_yaml
lm_eval/tasks/evalita_llm/_ls_template_yaml
+16
-0
lm_eval/tasks/evalita_llm/_ner_template_yaml
lm_eval/tasks/evalita_llm/_ner_template_yaml
+14
-0
lm_eval/tasks/evalita_llm/_re_template_yaml
lm_eval/tasks/evalita_llm/_re_template_yaml
+14
-0
lm_eval/tasks/evalita_llm/_sa_template_v2_yaml
lm_eval/tasks/evalita_llm/_sa_template_v2_yaml
+9
-0
lm_eval/tasks/evalita_llm/_sa_template_yaml
lm_eval/tasks/evalita_llm/_sa_template_yaml
+9
-0
lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml
lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml
+10
-0
lm_eval/tasks/evalita_llm/_sum_template_fp_yaml
lm_eval/tasks/evalita_llm/_sum_template_fp_yaml
+9
-0
lm_eval/tasks/evalita_llm/_sum_template_yaml
lm_eval/tasks/evalita_llm/_sum_template_yaml
+11
-0
lm_eval/tasks/evalita_llm/_te_template_yaml
lm_eval/tasks/evalita_llm/_te_template_yaml
+13
-0
No files found.
lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_te_tasks
include
:
_te_template_yaml
task
:
evalita-mp_te_prompt-6
task_alias
:
prompt-6
doc_to_choice
:
[
"
La
frase
1
implica
logicamente
che
la
frase
2
sia
vera"
,
"
La
frase
1
non
implica
logicamente
che
la
frase
2
sia
vera"
]
doc_to_text
:
"
Devi
risolvere
un
compito
di
inferenza
semantica.
Frase
1:
'{{text1}}'
Frase
2:
'{{text2}}'"
lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml
0 → 100644
View file @
bf11ac93
group
:
evalita-mp_te
group_alias
:
text-entailment
task
:
-
evalita-mp_te_tasks
# this has to match the tag in the task yaml file
aggregate_metric_list
:
-
metric
:
acc
weight_by_size
:
True
metadata
:
version
:
1
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_wic_tasks
task
:
evalita-mp_wic_prompt-1
task_alias
:
prompt-1
include
:
_wic_template_yaml
doc_to_text
:
"
La
parola:
'{{sentence1[start1:end1]}}'
nella
frase:
'{{sentence1}}'
ha
lo
stesso
significato
della
parola:
'{{sentence2[start2:end2]}}'
nella
frase:
'{{sentence2}}'?"
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_wic_tasks
task
:
evalita-mp_wic_prompt-2
task_alias
:
prompt-2
include
:
_wic_template_yaml
doc_to_text
:
"
Devi
determinare
se
una
stessa
parola
usata
in
due
frasi
differenti
ha
lo
stesso
significato
in
entrambi
i
contesti.
La
parola:
'{{sentence1[start1:end1]}}'
nella
frase:
'{{sentence1}}'
ha
lo
stesso
significato
della
parola:
'{{sentence2[start2:end2]}}'
nella
frase:
'{{sentence2}}'?"
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_wic_tasks
task
:
evalita-mp_wic_prompt-3
task_alias
:
prompt-3
include
:
_wic_template_yaml
doc_to_text
:
"
La
parola
'{{sentence1[start1:end1]}}'
nella
frase
'{{sentence1}}'
ha
lo
stesso
significato
della
parola
'{{sentence2[start2:end2]}}'
nella
frase
'{{sentence2}}'?
\n
A:
Sì
\n
B:
No
\n
Risposta:"
doc_to_choice
:
[
"
B"
,
"
A"
]
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_wic_tasks
task
:
evalita-mp_wic_prompt-4
task_alias
:
prompt-4
include
:
_wic_template_yaml
doc_to_text
:
"
Devi
determinare
se
una
stessa
parola
usata
in
due
frasi
differenti
ha
lo
stesso
significato
in
entrambi
i
contesti.
La
parola
'{{sentence1[start1:end1]}}'
nella
frase
'{{sentence1}}'
ha
lo
stesso
significato
della
parola
'{{sentence2[start2:end2]}}'
nella
frase
'{{sentence2}}'?
\n
A:
Sì
\n
B:
No
\n
Risposta:"
doc_to_choice
:
[
"
B"
,
"
A"
]
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_wic_tasks
task
:
evalita-mp_wic_prompt-5
task_alias
:
prompt-5
include
:
_wic_template_yaml
doc_to_text
:
"
La
parola:
'{{sentence1[start1:end1]}}'
nella
frase:
'{{sentence1}}'
e
la
parola:
'{{sentence2[start2:end2]}}'
nella
frase:
'{{sentence2}}'"
doc_to_choice
:
[
"
non
hanno
lo
stesso
significato"
,
"
hanno
lo
stesso
significato"
]
lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml
0 → 100644
View file @
bf11ac93
tag
:
evalita-mp_wic_tasks
task
:
evalita-mp_wic_prompt-6
task_alias
:
prompt-6
include
:
_wic_template_yaml
doc_to_text
:
"
Devi
determinare
se
una
stessa
parola
usata
in
due
frasi
differenti
ha
lo
stesso
significato
in
entrambi
i
contesti.
La
parola:
'{{sentence1[start1:end1]}}'
nella
frase:
'{{sentence1}}'
e
la
parola:
'{{sentence2[start2:end2]}}'
nella
frase:
'{{sentence2}}'"
doc_to_choice
:
[
"
non
hanno
lo
stesso
significato"
,
"
hanno
lo
stesso
significato"
]
lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml
0 → 100644
View file @
bf11ac93
group
:
evalita-mp_wic
group_alias
:
word-in-context
task
:
-
evalita-mp_wic_tasks
# this has to match the tag in the task yaml file
aggregate_metric_list
:
-
metric
:
f1
weight_by_size
:
True
metadata
:
version
:
1
lm_eval/tasks/evalita_llm/_faq_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/faq
test_split: test_1
fewshot_split: dev_1
doc_to_target: !function utils.faq_doc_to_target
doc_to_choice: ["A", "B", "C", "D"]
output_type: multiple_choice
metadata:
version: 1
lm_eval/tasks/evalita_llm/_hs_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/hatespeech_detection
output_type: multiple_choice
test_split: test_all
fewshot_split: dev
validation_split: dev
doc_to_target: hs # 0 = Falso, 1 = Vero
doc_to_choice: ["Falso", "Vero"]
metadata:
version: 1
lm_eval/tasks/evalita_llm/_ls_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/lexical_substitution
test_split: test
validation_split: dev
fewshot_split: dev
output_type: generate_until
generation_kwargs:
until:
- "</s>"
doc_to_target: !function utils.ls_doc_to_target
process_results: !function utils.ls_process_results
metric_list:
- metric: f1
higher_is_better: True
aggregation: !function metrics._aggreg_ls
metadata:
version: 1
lm_eval/tasks/evalita_llm/_ner_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/entity_recognition
output_type: generate_until
generation_kwargs:
until:
- "</s>"
- "\n"
doc_to_target: !function utils.ner_doc_to_target
process_results: !function utils.ner_process_results
metric_list:
- metric: f1
higher_is_better: True
aggregation: !function metrics._aggreg_ner
metadata:
version: 1
lm_eval/tasks/evalita_llm/_re_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/relation_extraction
test_split: test
output_type: generate_until
generation_kwargs:
until:
- "</s>"
doc_to_target: !function utils.re_doc_to_target
process_results: !function utils.rel_process_results_v3
metric_list:
- metric: f1
higher_is_better: True
aggregation: !function metrics._aggreg_rel
metadata:
version: 1
lm_eval/tasks/evalita_llm/_sa_template_v2_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/sentiment_analysis
output_type: multiple_choice
test_split: test
fewshot_split: train
validation_split: test
doc_to_target: !function utils.sa_doc_to_target_v2
doc_to_choice: ["positivo", "negativo", "neutrale", "misto"]
metadata:
version: 1
lm_eval/tasks/evalita_llm/_sa_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/sentiment_analysis
output_type: multiple_choice
test_split: test
fewshot_split: train
validation_split: test
doc_to_target: !function utils.sa_doc_to_target
doc_to_choice: !function utils.sa_doc_to_choice
metadata:
version: 1
lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/summarization-fp
output_type: generate_until
generation_kwargs:
until:
- "</s>"
test_split: test_100
fewshot_split: dev
doc_to_target: "{{target}}"
metadata:
version: 1
lm_eval/tasks/evalita_llm/_sum_template_fp_yaml
0 → 100644
View file @
bf11ac93
dataset_path: ARTeLab/fanpage
output_type: generate_until
generation_kwargs:
until:
- "</s>"
test_split: test
doc_to_target: "{{target}}"
metadata:
version: 1.0
lm_eval/tasks/evalita_llm/_sum_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: silvia-casola/WITS
output_type: generate_until
generation_kwargs:
until:
- "</s>"
test_split: test_100
fewshot_split: dev
#test_split: train
doc_to_target: "{{summary}}"
metadata:
version: 1
lm_eval/tasks/evalita_llm/_te_template_yaml
0 → 100644
View file @
bf11ac93
dataset_path: evalitahf/textual_entailment
output_type: multiple_choice
test_split: test
fewshot_split: dev
validation_split: dev
doc_to_target: "{{ 0 if entailment == 'SI' else 1 }}"
doc_to_choice: ["Sì", "No"]
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 1
Prev
1
…
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment