Commit 948f120f authored by Baber's avatar Baber
Browse files

Merge branch 'main' into autobatchtest

# Conflicts:
#	lm_eval/models/huggingface.py
parents a5b1c7a8 bd80a6c0
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_ca-eu
doc_to_text: 'Catalan sentence: {{sentence_cat_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_de-eu
doc_to_text: 'German sentence: {{sentence_deu_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_en-eu
doc_to_text: 'English sentence: {{sentence_eng_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_es-eu
doc_to_text: 'Spanish sentence: {{sentence_spa_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-ca
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-de
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
German sentence:'
doc_to_target: '{{sentence_deu_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-en
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
English sentence:'
doc_to_target: '{{sentence_eng_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-es
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Spanish sentence:'
doc_to_target: '{{sentence_spa_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-fr
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
French sentence:'
doc_to_target: '{{sentence_fra_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-gl
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Galician sentence:'
doc_to_target: '{{sentence_glg_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-it
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Italian sentence:'
doc_to_target: '{{sentence_ita_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-pt
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Portuguese sentence:'
doc_to_target: '{{sentence_por_Latn}}'
group: flores_eu
task:
- flores_es-eu
- flores_eu-es
- flores_en-eu
- flores_eu-en
- flores_eu-pt
- flores_pt-eu
- flores_eu-it
- flores_it-eu
- flores_eu-fr
- flores_fr-eu
- flores_eu-ca
- flores_ca-eu
- flores_eu-gl
- flores_gl-eu
- flores_eu-de
- flores_de-eu
aggregate_metric_list:
- metric: bleu
aggregation: mean
weight_by_size: false
metadata:
version: 1.0
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_fr-eu
doc_to_text: 'French sentence: {{sentence_fra_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_gl-eu
doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_it-eu
doc_to_text: 'Italian sentence: {{sentence_ita_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_pt-eu
doc_to_text: 'Portuguese sentence: {{sentence_por_Latn}}
Basque sentence:'
doc_to_target: '{{sentence_eus_Latn}}'
task: mgsm_native_cot_eu
dataset_path: HiTZ/MGSM-eu
dataset_name: null
doc_to_target: '{% if answer is not none %}{{answer[27:]}}{% else %}{{answer_number|string}}{%endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nErantzuna urratsez urrats:"}}{% else %}{{"Galdera: "+question+"\nErantzuna urratsez urrats:"}}{% endif %}'
output_type: generate_until
training_split: train
test_split: test
target_delimiter: " "
generation_kwargs:
until:
- "\n\n"
- "\n"
- "Galdera:"
- </s>
- <|im_end|>
do_sample: false
temperature: 0.0
filter_list:
- name: "get-answer"
filter:
- function: "regex"
regex_pattern: "Erantzuna [$%]? ?(-?[0-9]+([ .,][0-9.,]+)?) ?[$%]? da"
- function: "take_first"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
metadata:
version: 1.0
task: mgsm_direct_eu
dataset_path: HiTZ/MGSM-eu
dataset_name: null
doc_to_target: '{{answer_number|string}}'
doc_to_text: '{% if answer is not none %}{{question+"\nErantzuna:"}}{% else %}{{"Galdera: "+question+"\nErantzuna:"}}{% endif %}'
output_type: generate_until
training_split: train
test_split: test
target_delimiter: " "
generation_kwargs:
until:
- "\n\n"
- "\n"
- "Galdera:"
- </s>
- <|im_end|>
do_sample: false
temperature: 0.0
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
- name: flexible-extract
filter:
- function: regex
group_select: -1
regex_pattern: (-?[0-9]+([ .,][0-9.,]+)?)
- function: take_first
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
- " "
metadata:
version: 1.0
from functools import partial
# ~~~~~~~~~~~ XCOPA ~~~~~~~~~~~ #
xcopa_connectors = {"cause": " Izan ere,", "effect": " Beraz,"}
def xcopa_doc_to_text(doc):
conn = xcopa_connectors[doc["question"]]
return doc["premise"].strip() + f"{conn}"
def xcopa_doc_to_choice(doc):
def convert_choice(choice):
return choice[0].lower() + choice[1:]
return [convert_choice(doc["choice1"]), convert_choice(doc["choice2"])]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment