Merge branch 'main' into feature/eval_from_config

601be343 · Baber · d0884a96 · 68c3a811 · 601be343 · 601be343
Commit 601be343 authored Jun 23, 2025 by Baber
20 changed files
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_orm.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_orm.yaml
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Oromo language.\nAnalyze the premise and hypothesis given in Oromo, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_orm_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sna.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sna.yaml
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the chiShona language.\nAnalyze the premise and hypothesis given in chiShona,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sna_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sot.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sot.yaml
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Sesotho language.\nAnalyze the premise and hypothesis given in Sesotho, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sot_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_swa.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_swa.yaml
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Swahili language.\nAnalyze the premise and hypothesis given in Swahili, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_swa_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_twi.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_twi.yaml
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Twi language.\nAnalyze the premise and hypothesis given in Twi, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_twi_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_wol.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_wol.yaml
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Wolof language.\nAnalyze the premise and hypothesis given in Wolof, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_wol_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_xho.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_xho.yaml
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the isiXhosa language.\nAnalyze the premise and hypothesis given in isiXhosa,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_xho_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yaml
+tag:
+    - afrixnli_tasks
+    - afrixnli_tasks_prompt_4
+dataset_path: masakhane/afrixnli
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yor.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yor.yaml
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Yoruba language.\nAnalyze the premise and hypothesis given in Yoruba, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_yor_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_zul.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_zul.yaml
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Zulu language.\nAnalyze the premise and hypothesis given in Zulu, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_zul_prompt_4
--- a/lm_eval/tasks/afrixnli/direct/prompt_4/utils.py
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/utils.py
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+
+    Is it entailment, contradiction, or neutral?"""
+
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
+    return text
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_amh.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_amh.yaml
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_eng.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_eng.yaml
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ewe.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ewe.yaml
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_fra.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_fra.yaml
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_hau.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_hau.yaml
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ibo.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ibo.yaml
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_kin.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_kin.yaml
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lin.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lin.yaml
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_5
--- a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lug.yaml
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lug.yaml
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_5