Merge branch 'main' into mathvista

# Conflicts: # lm_eval/models/hf_vlms.py

Merge branch 'main' into mathvista
# Conflicts: # lm_eval/models/hf_vlms.py
25869601 · Baber · 56f40c53 · c1d8795d · 25869601 · 25869601
Commit 25869601 authored Oct 19, 2024 by Baber
20 changed files
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_de-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_de-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_de-gl
+doc_to_text: 'German sentence: {{sentence_deu_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_en-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_en-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_en-gl
+doc_to_text: 'English sentence: {{sentence_eng_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_es-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_es-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_es-gl
+doc_to_text: 'Spanish sentence: {{sentence_spa_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_eu-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_eu-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_eu-gl
+doc_to_text: 'Basque sentence: {{sentence_eus_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_fr-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_fr-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_fr-gl
+doc_to_text: 'French sentence: {{sentence_fra_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-ca.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-ca.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-ca
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  Catalan sentence:'
+doc_to_target: '{{sentence_cat_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-de.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-de.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-de
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  German sentence:'
+doc_to_target: '{{sentence_deu_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-en.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-en.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-en
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  English sentence:'
+doc_to_target: '{{sentence_eng_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-es.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-es.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-es
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  Spanish sentence:'
+doc_to_target: '{{sentence_spa_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-eu.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-eu.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-eu
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  Basque sentence:'
+doc_to_target: '{{sentence_eus_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-fr.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-fr.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-fr
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  French sentence:'
+doc_to_target: '{{sentence_fra_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-it.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-it.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-it
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  Italian sentence:'
+doc_to_target: '{{sentence_ita_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl-pt.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl-pt.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_gl-pt
+doc_to_text: 'Galician sentence: {{sentence_glg_Latn}}
+
+  Portuguese sentence:'
+doc_to_target: '{{sentence_por_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_gl.yaml
+group: flores_gl
+task:
+  - flores_es-gl
+  - flores_gl-es
+  - flores_en-gl
+  - flores_gl-en
+  - flores_eu-gl
+  - flores_gl-eu
+  - flores_pt-gl
+  - flores_gl-pt
+  - flores_it-gl
+  - flores_gl-it
+  - flores_fr-gl
+  - flores_gl-fr
+  - flores_ca-gl
+  - flores_gl-ca
+  - flores_gl-de
+  - flores_de-gl
+aggregate_metric_list:
+  - metric: bleu
+    aggregation: mean
+    weight_by_size: false
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_it-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_it-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_it-gl
+doc_to_text: 'Italian sentence: {{sentence_ita_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/flores_gl/flores_pt-gl.yaml
+++ b/lm_eval/tasks/galician_bench/flores_gl/flores_pt-gl.yaml
+# File generated by `create-yamls.py`
+include: _flores_common_yaml
+task: flores_pt-gl
+doc_to_text: 'Portuguese sentence: {{sentence_por_Latn}}
+
+  Galician sentence:'
+doc_to_target: '{{sentence_glg_Latn}}'
--- a/lm_eval/tasks/galician_bench/galcola.yaml
+++ b/lm_eval/tasks/galician_bench/galcola.yaml
+task: galcola
+dataset_path: proxectonos/galcola
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+test_split: test
+doc_to_text: "{{sentence}}\nPregunta: Ten sentido esta frase?\nResposta:"
+doc_to_target: label
+doc_to_choice: ["non", "si"]
+should_decontaminate: true
+doc_to_decontamination_query: sentence
+metric_list:
+  - metric: mcc
+  - metric: acc
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/galician_bench/galician_bench.yaml
+++ b/lm_eval/tasks/galician_bench/galician_bench.yaml
+group: galician_bench
+task:
+  - belebele_glg_Latn
+  - flores_gl
+  - galcola
+  - summarization_gl
+  - parafrases_gl
+  - paws_gl
+  - openbookqa_gl
+  - mgsm_direct_gl
+  - truthfulqa_gl
+  - xnli_gl
+  - xstorycloze_gl
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/galician_bench/mgsm_direct_gl.yaml
+++ b/lm_eval/tasks/galician_bench/mgsm_direct_gl.yaml
+task: mgsm_direct_gl
+dataset_path: proxectonos/mgsm_gl
+doc_to_target: '{{answer_number|string}}'
+doc_to_text: '{% if answer != None %}{{question + "\nResposta: "}}{% else %}{{"Pregunta: " + question + "\nResposta: "}}{% endif %}'
+output_type: generate_until
+training_split: train
+test_split: test
+target_delimiter: ""
+generation_kwargs:
+  until:
+    - "\n\n"
+    - "\n"
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/galician_bench/openbookqa_gl.yaml
+++ b/lm_eval/tasks/galician_bench/openbookqa_gl.yaml
+# Task configuration directly taken from Eleuther AI's implementation as of March 22, 2024
+task: openbookqa_gl
+dataset_path: proxectonos/openbookqa_gl
+output_type: multiple_choice
+training_split: null
+validation_split: validation
+test_split: test
+doc_to_text: question_stem
+doc_to_target: "{{choices.label.index(answerKey.lstrip())}}"
+doc_to_choice: "{{choices.text}}"
+should_decontaminate: true
+doc_to_decontamination_query: question_stem
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_norm
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 1.0