Unverified Commit cb069004 authored by zxcvuser's avatar zxcvuser Committed by GitHub
Browse files

Add new benchmark: Catalan bench (#2154)



* Add catalan_bench

* added flores_ca.yaml

* Updated some task groupings and readme

* Fix create_yamls_flores_ca.py

---------
Co-authored-by: default avatarHailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
parent c887796d
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_ca-fr
doc_to_text: 'Catalan sentence: {{sentence_cat_Latn}}
French sentence:'
doc_to_target: '{{sentence_fra_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_ca-gl
doc_to_text: 'Catalan sentence: {{sentence_cat_Latn}}
Galician sentence:'
doc_to_target: '{{sentence_glg_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_ca-it
doc_to_text: 'Catalan sentence: {{sentence_cat_Latn}}
Italian sentence:'
doc_to_target: '{{sentence_ita_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_ca-pt
doc_to_text: 'Catalan sentence: {{sentence_cat_Latn}}
Portuguese sentence:'
doc_to_target: '{{sentence_por_Latn}}'
group: flores_ca
task:
- flores_es-ca
- flores_ca-es
- flores_en-ca
- flores_ca-en
- flores_eu-ca
- flores_ca-eu
- flores_pt-ca
- flores_ca-pt
- flores_it-ca
- flores_ca-it
- flores_fr-ca
- flores_ca-fr
- flores_ca-gl
- flores_gl-ca
- flores_ca-de
- flores_de-ca
aggregate_metric_list:
- metric: bleu
aggregation: mean
weight_by_size: false
metadata:
version: 1.0
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_de-ca
doc_to_text: 'German sentence: {{sentence_deu_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_en-ca
doc_to_text: 'English sentence: {{sentence_eng_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_es-ca
doc_to_text: 'Spanish sentence: {{sentence_spa_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_eu-ca
doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_fr-ca
doc_to_text: 'French sentence: {{sentence_fra_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_gl-ca
doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_it-ca
doc_to_text: 'Italian sentence: {{sentence_ita_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
# File generated by `create-yamls.py`
include: _flores_common_yaml
task: flores_pt-ca
doc_to_text: 'Portuguese sentence: {{sentence_por_Latn}}
Catalan sentence:'
doc_to_target: '{{sentence_cat_Latn}}'
task: mgsm_direct_ca
dataset_path: projecte-aina/mgsm_ca
doc_to_target: '{{answer_number|string}}'
doc_to_text: '{% if answer != None %}{{question + "\nResposta: "}}{% else %}{{"Pregunta: " + question + "\nResposta: "}}{% endif %}'
output_type: generate_until
training_split: train
test_split: test
target_delimiter: ""
generation_kwargs:
until:
- "\n\n"
- "\n"
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
metadata:
version: 1.0
task: openbookqa_ca
dataset_path: projecte-aina/openbookqa_ca
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: test
doc_to_text: question_stem
doc_to_target: "{{choices.label.index(answerKey.lstrip())}}"
doc_to_choice: "{{choices.text}}"
should_decontaminate: true
doc_to_decontamination_query: question_stem
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: parafraseja
dataset_path: projecte-aina/Parafraseja
output_type: multiple_choice
dataset_name: null
test_split: test
training_split: train
validation_split: validation
doc_to_choice: '{{[sentence1+", veritat? No, "+sentence2, sentence1+", veritat? Sí, "+sentence2]}}'
process_docs: !function utils.process_docs_paraphrases
doc_to_text: ''
doc_to_target: label
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: paws_ca
dataset_path: projecte-aina/PAWS-ca
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
process_docs: !function utils.process_docs_paraphrases
doc_to_text: ''
doc_to_target: label
doc_to_choice: '{{[sentence1+", veritat? No, "+sentence2, sentence1+", veritat? Sí, "+sentence2]}}'
target_delimiter: ''
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
tag: phrases_va
dataset_path: gplsi/CA-VA_alignment_test
output_type: generate_until
training_split: null
validation_split: null
test_split: test
fewshot_split: test
num_fewshot: 5
target_delimiter: ' '
generation_kwargs:
until:
- "\n"
metric_list:
- metric: bleu
aggregation: bleu
higher_is_better: true
- metric: ter
aggregation: ter
higher_is_better: false
- metric: chrf
aggregation: chrf
higher_is_better: true
metadata:
version: 1.0
# File generated by `create-yamls.py`
include: _phrases_va_common.yaml
task: phrases_ca-va
doc_to_text: 'Oració en català: {{ca}}
Oració en valencià:'
doc_to_target: '{{va}}'
# File generated by `create-yamls.py`
include: _phrases_va_common.yaml
task: phrases_va-ca
doc_to_text: 'Oració en valencià: {{va}}
Oració en català:'
doc_to_target: '{{ca}}'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment