Commit 173b2bc3 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into humaneval

# Conflicts:
#	lm_eval/api/task.py
parents 74344829 bb098f13
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-es
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Spanish sentence:'
doc_to_target: '{{sentence_spa_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-fr
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
French sentence:'
doc_to_target: '{{sentence_fra_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-gl
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Galician sentence:'
doc_to_target: '{{sentence_glg_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-it
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Italian sentence:'
doc_to_target: '{{sentence_ita_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-pt
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Portuguese sentence:'
doc_to_target: '{{sentence_por_Latn}}'
group: flores_eu
task:
- flores_es-eu
- flores_eu-es
- flores_en-eu
- flores_eu-en
- flores_eu-pt
- flores_pt-eu
- flores_eu-it
- flores_it-eu
- flores_eu-fr
- flores_fr-eu
- flores_eu-ca
- flores_ca-eu
- flores_eu-gl
- flores_gl-eu
- flores_eu-de
- flores_de-eu
aggregate_metric_list:
- metric: bleu
aggregation: mean
weight_by_size: false
metadata:
version: 1.0
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_fr-eu
doc_to_text: 'French sentence: {{sentence_fra_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_gl-eu
doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_it-eu
doc_to_text: 'Italian sentence: {{sentence_ita_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_pt-eu
doc_to_text: 'Portuguese sentence: {{sentence_por_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
task: mgsm_native_cot_eu
dataset_path: HiTZ/MGSM-eu
dataset_name: null
doc_to_target: '{% if answer is not none %}{{answer[27:]}}{% else %}{{answer_number|string}}{%endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nErantzuna urratsez urrats:"}}{% else %}{{"Galdera: "+question+"\nErantzuna urratsez urrats:"}}{% endif %}'
output_type: generate_until
training_split: train
test_split: test
target_delimiter: " "
generation_kwargs:
until:
- "\n\n"
- "\n"
- "Galdera:"
- </s>
- <|im_end|>
do_sample: false
temperature: 0.0
filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "Erantzuna [$%]? ?(-?[0-9]+([ .,][0-9.,]+)?) ?[$%]? da"
- function: "take_first"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
metadata:
version: 1.0
task: mgsm_direct_eu
dataset_path: HiTZ/MGSM-eu
dataset_name: null
doc_to_target: '{{answer_number|string}}'
doc_to_text: '{% if answer is not none %}{{question+"\nErantzuna:"}}{% else %}{{"Galdera: "+question+"\nErantzuna:"}}{% endif %}'
output_type: generate_until
training_split: train
test_split: test
target_delimiter: " "
generation_kwargs:
until:
- "\n\n"
- "\n"
- "Galdera:"
- </s>
- <|im_end|>
do_sample: false
temperature: 0.0
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
- name: flexible-extract
filter:
- function: regex
group_select: -1
regex_pattern: (-?[0-9]+([ .,][0-9.,]+)?)
- function: take_first
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
metadata:
version: 1.0
task: piqa_eu
dataset_path: HiTZ/PIQA-eu
dataset_name: null
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: null
doc_to_text: "Galdera: {{goal}}\nErantzuna:"
doc_to_target: label
doc_to_choice: "{{[sol1, sol2]}}"
should_decontaminate: true
doc_to_decontamination_query: goal
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
from functools import partial
# ~~~~~~~~~~~ XCOPA ~~~~~~~~~~~ #
xcopa_connectors = {"cause": " Izan ere,", "effect": " Beraz,"}
def xcopa_doc_to_text(doc):
conn = xcopa_connectors[doc["question"]]
return doc["premise"].strip() + f"{conn}"
def xcopa_doc_to_choice(doc):
def convert_choice(choice):
return choice[0].lower() + choice[1:]
return [convert_choice(doc["choice1"]), convert_choice(doc["choice2"])]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
task: wnli_eu
dataset_path: HiTZ/wnli-eu
dataset_name: null
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: null
doc_to_text: "{{sentence1}}\nGaldera: {{sentence2}} Egia edo Gezurra?\nErantzuna:"
doc_to_target: label
doc_to_choice: ["Gezurra", "Egia"]
metric_list:
- metric: acc
metadata:
version: 1.0
task: xcopa_eu
dataset_path: HiTZ/XCOPA-eu
dataset_name: null
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
doc_to_text: !function utils.xcopa_doc_to_text
doc_to_target: label
doc_to_choice: !function utils.xcopa_doc_to_choice
metric_list:
- metric: acc
metadata:
version: 1.0
......@@ -43,11 +43,15 @@ Homepage: `https://github.com/hitz-zentroa/latxa`
}
```
### Groups and Tasks
### Groups, Tags, and Tasks
#### Groups
* `basque-glue`: First version of the implementation
None.
#### Tags
* `basque-glue`: First version of the implementation. Calls all subtasks, but does not average.
#### Tasks
......
group: basque-glue
tag: basque-glue
task: bec2016eu
dataset_path: orai-nlp/basqueGLUE
dataset_name: bec
......
group: basque-glue
tag: basque-glue
task: bhtc_v2
dataset_path: orai-nlp/basqueGLUE
dataset_name: bhtc
......
group: basque-glue
tag: basque-glue
task: epec_koref_bin
dataset_path: orai-nlp/basqueGLUE
dataset_name: coref
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment