diff --git a/lm_eval/filters/extraction.py b/lm_eval/filters/extraction.py
index 9c8d796b6099d89fb6b6e5b2e17444cfa66f1b06..22ca883a9d00b2156c6aedc5df7448879a03da65 100644
--- a/lm_eval/filters/extraction.py
+++ b/lm_eval/filters/extraction.py
@@ -54,6 +54,51 @@ class RegexFilter(Filter):
             return filtered
 
         filtered_resps = list(map(lambda x: filter_set(x), resps))
+        return filtered_resps
+
+
+@register_filter("regex_pos")
+class POSFilter(Filter):
+    """ """
+
+    def __init__(
+        self,
+        regex_pattern: str = r"\['(.*?)'\]",
+        group_select=0,
+        fallback=None,
+    ) -> None:
+        """
+        pass a string `regex` to run `re.compile(r"regex")` on.
+        `fallback` defines the output returned if no matches for the regex are located.
+        """
+        if fallback is None:
+            fallback = ["invalid"]
+        self.regex_pattern = regex_pattern
+        self.regex = re.compile(regex_pattern)
+        self.group_select = group_select
+        self.fallback = fallback
+
+    def apply(self, resps, docs):
+        def extract_tagged_tokens(text):
+            # Extract tagged tokens list from text input using regex
+            tokens = re.findall(r"\('([^']*)', '([^']*)'\)", text)
+            return [(token, pos) for token, pos in tokens]
+
+        def extract_pos_tags(result):
+            pos_tags = []
+            if isinstance(result, str):
+                result = extract_tagged_tokens(result)
+            pos_tags.extend(pos for _, pos in result)
+            return pos_tags if pos_tags else self.fallback
+
+        def filter_set(inst):
+            filtered = []
+            for resp in inst:
+                match = extract_pos_tags(resp)
+                filtered.append(match)
+            return filtered
+
+        filtered_resps = map(lambda x: filter_set(x), resps)
 
         return filtered_resps
 
diff --git a/lm_eval/filters/transformation.py b/lm_eval/filters/transformation.py
index 1a3592b6dd4811dcef39ff090dfa42e926613b5c..722c67403c8adbc499283a611df17eb1743307b8 100644
--- a/lm_eval/filters/transformation.py
+++ b/lm_eval/filters/transformation.py
@@ -1,3 +1,5 @@
+import re
+
 from lm_eval.api.filter import Filter
 from lm_eval.api.registry import register_filter
 
@@ -54,3 +56,67 @@ class MapFilter(Filter):
             return [self.mapping_dict.get(resp, self.default_value) for resp in inst]
 
         return [filter_set(resp) for resp in resps]
+
+
+@register_filter("format_span")
+class SPANFilter(Filter):
+    def __init__(self) -> None:
+        pass
+
+    def apply(self, resps, docs):
+        def format_ner_text(text):
+            label_dict = {
+                "person": "PER",
+                "location": "LOC",
+                "organization": "ORG",
+                "counties": "LOC",
+                "places": "LOC",
+                "people": "PER",
+                "persons": "PER",
+                "company": "ORG",
+                "country": "LOC",
+                "continent": "LOC",
+                "time": "DATE",
+                "date": "DATE",
+                "per": "PER",
+                "loc": "LOC",
+                "org": "ORG",
+            }
+            text = text.lower()
+            for key, value in label_dict.items():
+                text = text.replace(key, value)
+
+            text = "$".join(i for i in text.split("$$"))
+            return text.rstrip("$$")
+
+        def format_named_entities(text):
+            """
+            Extract named entities from text and format them as 'label: value $$ label: value'.
+            Handles grouped entities (e.g., LOC: kenya, uganda) and excludes 'none' values.
+            """
+            # Regular expression to match label: entities pattern
+            pattern = r"\b(PER|LOC|ORG|DATE):\s*([^$]+)"
+            # Normalize newline characters
+            text = text.replace("\n", "$").strip()
+            matches = re.findall(pattern, text)
+
+            formatted_entities = []
+
+            for label, values in matches:
+                # Split multiple entities separated by commas and strip whitespace
+                entities = [value.strip() for value in values.split(",")]
+
+                # Exclude 'none' entities
+                for entity in entities:
+                    if entity.lower() != "none":
+                        formatted_entities.append(f"{label.lower()}: {entity}")
+
+            # Join entities with the desired separator
+            return " $ ".join(formatted_entities)
+
+        def filter_set(inst):
+            return [
+                format_named_entities(format_ner_text(resp.lower())) for resp in inst
+            ]
+
+        return [filter_set(resp) for resp in resps]
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2316a748bd6f72f0c234544c016ddfd6b33fd9ff
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/afrimgsm.yaml
@@ -0,0 +1,13 @@
+group: afrimgsm-irokobench
+task:
+  - afrimgsm_tasks_prompt_1
+  - afrimgsm_tasks_prompt_2
+  - afrimgsm_tasks_prompt_3
+  - afrimgsm_tasks_prompt_4
+  - afrimgsm_tasks_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml
deleted file mode 100644
index 04d0bdd67114f3c0887979fdce210f0fa94616e7..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: amh
-doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_amh
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml
deleted file mode 100644
index 5804270d4d0072764ca3d1190a75d7629bc251e9..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: eng
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_eng
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml
deleted file mode 100644
index 4eae6fc4c790968040080aee824c345bd786db44..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: ewe
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_ewe
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml
deleted file mode 100644
index 16aeacf2c54706a18165bd1230ee812bb080ceb8..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: fra
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_fra
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml
deleted file mode 100644
index 3a6668e989af297b60b1aafd53a3cb44e3936a60..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: hau
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_hau
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml
deleted file mode 100644
index ab79986a5dec2af92711a675b3a4d79b31b044a9..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: ibo
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_ibo
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml
deleted file mode 100644
index d4c9c75af0ccfc6d2b0b18138dec074e10b6047e..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: kin
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_kin
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml
deleted file mode 100644
index 7136d7370cfd8f9e35b4ebc5e0615330b84edddc..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: lin
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_lin
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml
deleted file mode 100644
index 03fc0c2884cf9d14cadcf583cce1e81c47938963..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: lug
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_lug
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml
deleted file mode 100644
index 49d7e93390dc5c63ce83364ea1ec8ede77537ea8..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: orm
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_orm
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml
deleted file mode 100644
index a61de85a3ffbbd5c2f3e91d5f26eb63a6241d78c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: sna
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_sna
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml
deleted file mode 100644
index 455c1adcc5b896ce2c2140c9f30e8fa1857e60a2..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: sot
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_sot
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml
deleted file mode 100644
index 462ddfd378f8c02a872780a8013f0f74378551e0..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: swa
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_swa
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml
deleted file mode 100644
index 8c4673b7ba00668d5d3bdcacfd2e00f342362194..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: twi
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_twi
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml
deleted file mode 100644
index 08a8e030a4c0c0d444ac464b974d9886e434ff43..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: wol
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_wol
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml
deleted file mode 100644
index 2103d182f3ca1703c43e03279a6d1aa9bcc9532d..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: xho
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_xho
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml
deleted file mode 100644
index aa084c32a645cab532b002565f3c8a324708d6ba..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: yor
-doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_yor
diff --git a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml b/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml
deleted file mode 100644
index dcffb6944658282d620f7dbcec9d6513bcaf36c5..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: zul
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: direct_yaml
-task: afrimgsm_direct_zul
diff --git a/lm_eval/tasks/afrimgsm/direct/direct_yaml b/lm_eval/tasks/afrimgsm/direct/direct_yaml
deleted file mode 100644
index f9819fe6f8470e37e73b3f3bc7d6b5cf8147a290..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/direct/direct_yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# This file will be included in the generated language-specific task configs.
-# It doesn't have a yaml file extension as it is not meant to be imported directly
-# by the harness.
-tag:
-    - afrimgsm
-    - afrimgsm_direct
-dataset_path: masakhane/afrimgsm
-dataset_name: null  # Overridden by language-specific config.
-output_type: generate_until
-# training_split: train
-test_split: test
-target_delimiter: ""
-generation_kwargs:
-  until:
-    - "\n\n"
-    - "\n"
-  do_sample: false
-  temperature: 0.0
-filter_list:
-  - name: remove_whitespace
-    filter:
-      - function: remove_whitespace
-      - function: take_first
-  - filter:
-    - function: regex
-      group_select: -1
-      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
-    - function: take_first
-    name: flexible-extract
-metric_list:
-  - metric: exact_match
-    aggregation: mean
-    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
-metadata:
-  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_amh.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23007e3657c85b3b42ac5591180096c54740a240
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_yaml
+task: afrimgsm_amh_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_eng.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d5694225089b96dfeb06d331482bafa821cede8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimgsm_yaml
+task: afrimgsm_eng_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_ewe.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68980400de696f30ce325ff260b5f8cefd5d95dc
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_yaml
+task: afrimgsm_ewe_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_fra.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04d57dbd329cedd535476cd7980ac5a201d84847
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_yaml
+task: afrimgsm_fra_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_hau.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aef377d292006471eecc551eb414ced3c751eaa5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_yaml
+task: afrimgsm_hau_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_ibo.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbbb7ef859249191cdf52db9bdab6135319e7a60
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_yaml
+task: afrimgsm_ibo_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_kin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dcfc7160d7b3262ebb78b30a4fa070f748e1e619
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_yaml
+task: afrimgsm_kin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_lin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be6a24ceb1cdec606a11b31865f75a8ef5188b4a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_yaml
+task: afrimgsm_lin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_lug.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4bffe69f252d7edda8e146a6f61b81dc0bd550c5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_yaml
+task: afrimgsm_lug_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_orm.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b916281cee9217fafbfdfe62a9c451ef918b36d2
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_yaml
+task: afrimgsm_orm_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_sna.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1afa6bb3a5455f405d9735b933844a0974ec0899
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_yaml
+task: afrimgsm_sna_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_sot.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6cd4cdcd7e4860f73fa0bbdffe16ce02b2d2234
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_yaml
+task: afrimgsm_sot_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_swa.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be6dba7151542a8b6ecb4e5cb1da18ab0d9121a3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_yaml
+task: afrimgsm_swa_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_twi.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a82235d7883d0fb0b23e92129ed373d2503a31e1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_yaml
+task: afrimgsm_twi_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_vai.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e04d28f0d33f1da1d6282431c8d4e1655a1f175b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_yaml
+task: afrimgsm_vai_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_wol.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34b773765f90db8695611663bf615744fd6cfaa8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_yaml
+task: afrimgsm_wol_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_xho.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d17530bd22284dfd33556f043fb2c18d0325a174
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_yaml
+task: afrimgsm_xho_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19d4f7d1fdf39118e2fc774097619b837d34122e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_yaml
@@ -0,0 +1,35 @@
+tag:
+    - afrimgsm_tasks
+    - afrimgsm_tasks_prompt_1
+dataset_path: masakhane/afrimgsm
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_yor.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba89f9ace0bf2aff3e467e021408d6de790ddd0f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_yaml
+task: afrimgsm_yor_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_zul.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07b89135ac35c6c3f67df44ff3004e5aeab4197e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_1/afrimgsm_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_yaml
+task: afrimgsm_zul_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_amh.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac0812c1181c50fc51457b65f2cbeb8f64b5a78d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_yaml
+task: afrimgsm_amh_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_eng.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..940000376a025ef42c7352c2db416b2fe1a9b38f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimgsm_yaml
+task: afrimgsm_eng_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_ewe.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0cd4926120ab7de80c13ba9b13bd327c81866bb
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_yaml
+task: afrimgsm_ewe_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_fra.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d709e9ccfdb0251096156eecea495576aca1c13
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_yaml
+task: afrimgsm_fra_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_hau.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e7d62ab615e7776f714f717d3b0f246782c8f21
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_yaml
+task: afrimgsm_hau_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_ibo.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fce1d51899bd307d460f312cbc60c8820295db6f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_yaml
+task: afrimgsm_ibo_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_kin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c7b65251577ccb7bfa1fe74ddbe06114247dae5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_yaml
+task: afrimgsm_kin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_lin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..71594885ab6bb615f95c4a0a4f17d24ba41bbd1d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_yaml
+task: afrimgsm_lin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_lug.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf4633745e06b7af03cf890f7d2b9426179bbdbc
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_yaml
+task: afrimgsm_lug_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_orm.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3d9d96ed900fb7583da0b676616f069ec9bf5ee
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_yaml
+task: afrimgsm_orm_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_sna.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8e27f832a3b24b39d7515d657c01c54587fa3ac7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_yaml
+task: afrimgsm_sna_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_sot.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ad77562229ec921e07ebcfe8a35ce9c7b072d99
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_yaml
+task: afrimgsm_sot_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_swa.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fea74a3d1bd1bad4946491457ef689d953b8f66a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_yaml
+task: afrimgsm_swa_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_twi.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..311639a12c103194dec5ccc6894bb175ac67cc26
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_yaml
+task: afrimgsm_twi_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_vai.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..655b23dec64404bb271b558726ef5f279096f4c0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_yaml
+task: afrimgsm_vai_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_wol.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..493551623d4071cfebbf59f0a179f487a0f46866
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_yaml
+task: afrimgsm_wol_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_xho.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c076be5a556970703a4f1581700d1c0b2a3217d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_yaml
+task: afrimgsm_xho_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2eaceade5342296cc43a1d343ee9af582792922e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_yaml
@@ -0,0 +1,34 @@
+tag:
+    - afrimgsm_tasks
+    - afrimgsm_tasks_prompt_1
+dataset_path: masakhane/afrimgsm
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: "Give direct numerical answers for the question provided. \n\nQuestion: {{question}} \nAnswer: "
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_yor.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6faf98bdd2ebe70aa1fbe7a0bc955eedce4c726a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_yaml
+task: afrimgsm_yor_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_zul.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b340608c1289ef7a6257cec74ea0956039d12a6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_2/afrimgsm_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_yaml
+task: afrimgsm_zul_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_amh.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb3ed8309c481abfa483761fefa9f8792f9a600a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_yaml
+task: afrimgsm_amh_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_eng.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87efd748800c67b8703318535649280952b21208
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimgsm_yaml
+task: afrimgsm_eng_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_ewe.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4636069a5a6e9291fd071b1221c87927f895264f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_yaml
+task: afrimgsm_ewe_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_fra.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1faf85c8be5594d03e73c57fcea2fcfba1508a9
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_yaml
+task: afrimgsm_fra_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_hau.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0cfcbe44e862238c16a5c12feadbedca447a3ee
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_yaml
+task: afrimgsm_hau_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_ibo.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc451b94feac4afc4ba0405ee9643e0ab6790080
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_yaml
+task: afrimgsm_ibo_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_kin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f01edaae271f79a4f3f9e6ce444e62af86a85032
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_yaml
+task: afrimgsm_kin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_lin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86ccf1e44be09e34aa5b597f1db171a1c6b7f9a5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_yaml
+task: afrimgsm_lin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_lug.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff6b6de36aa85f43da7c26b0bccf33b92a4f5d9a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_yaml
+task: afrimgsm_lug_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_orm.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a5ff414f680b347e5d9b7f9504be35b968b4d7d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_yaml
+task: afrimgsm_orm_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_sna.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4000b9dc41fa79e16f673db2f54fa31d77dded02
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_yaml
+task: afrimgsm_sna_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_sot.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9941fce3105d32d2610c4e41b938360ac8b961c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_yaml
+task: afrimgsm_sot_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_swa.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abd8358024d6cc567f6044b7e0dfa69cd9d1a5eb
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_yaml
+task: afrimgsm_swa_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_twi.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12652498abab497f610192e3dd6d377110e913d4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_yaml
+task: afrimgsm_twi_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_vai.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aae0b5f51a347f8b52841b0e0fc49197a8c8490b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_yaml
+task: afrimgsm_vai_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_wol.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a8fd58b055584229c5143e5cc862c250a2b3e0b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_yaml
+task: afrimgsm_wol_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_xho.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..067e88d709cd0f06c217d6823ae2cd99ebd5f003
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_yaml
+task: afrimgsm_xho_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dd3f5ca74a7c6d4dcba9daab9d8c7653b9c8e6e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_yaml
@@ -0,0 +1,34 @@
+tag:
+    - afrimgsm_tasks
+    - afrimgsm_tasks_prompt_3
+dataset_path: masakhane/afrimgsm
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: "Solve the following math question \n\nQuestion: {{question}} \nAnswer: "
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_yor.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d64481065491583c980a83d9ea40507b8830552
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_yaml
+task: afrimgsm_yor_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_zul.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b94a2c13dae19e6ed50ab49a321b1f73d326a97
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_3/afrimgsm_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_yaml
+task: afrimgsm_zul_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_amh.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00e45eb4d433718f0ee36896297a20e3aced9dc6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_amh_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_eng.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e3abef526bb0571d2a6fbac07a2abeff6acb3a8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_eng_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_ewe.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cda1994b4dd2b18fb18e0200342a58c1e42d47b8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_ewe_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_fra.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b7e5c155519d43922d3391bfe903ab8720d9ddbf
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_fra_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_hau.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e6ad1a00dce65e79adac55593e53e9683b48decd
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_hau_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_ibo.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7d214574ade50c17d374e7daad4b4a8e1670db78
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_ibo_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_kin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2c7cb9ffe60577d49d6aa87c191e0fb81c974ac
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_kin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_lin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44d2d2f39bc83011268c1bcffaa4ea8564e3b5a4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_lin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_lug.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8a12945fd73da0abe42bc7504ec5a861ef86a8d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_lug_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_orm.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dcf7123f8e45f9d5d32ffc97f5038c29d946958d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_orm_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_sna.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d3414e3ca1273567847599c244e266e66e495dc
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_sna_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_sot.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac0017a36b03a55102fab40f5c6a3cb2d6fffa84
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_sot_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_swa.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..043311d3e634759e7b08892126dfc64b94e39310
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_swa_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_twi.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3e030d0bc072ba50afa51a988ee0fa4d37f8e7f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_twi_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_vai.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2dbe04c16eab6ea207a384c01860cc2555ffb6bb
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_vai.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: vai
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_vai_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_wol.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55f546ab101fb6ee26158d5803fe1ad80f3cb8e4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_wol_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_xho.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a759549811b8e6f009ed49c7b4bd16bc46163d8f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_xho_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f34f774e9bc1cf7fb925eac9fd55b123389b738
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_yaml
@@ -0,0 +1,33 @@
+tag:
+    - afrimgsm_tasks
+    - afrimgsm_tasks_prompt_4
+dataset_path: masakhane/afrimgsm
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_yor.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd4d01bbf4f5ddb22b8057f3ffd99996c82b92d1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_yor_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_zul.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5151f266e673a0106c64c548f53d2b2df18f7896
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_4/afrimgsm_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_zul_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_amh.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7ad215fac134e19c62187205f141a73d1978e22
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "For mathematical questions provided in Amharic language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_amh_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_eng.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4de5e95948fd28d990f224c9cb0f6e82b6f5cf5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "For mathematical questions provided in English language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_eng_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_ewe.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdb3c4f278e1e31e5c20cb399df2a4a374aaa253
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_ewe.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "For mathematical questions provided in Ewe language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_ewe_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_fra.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a93e79ec90099beb8c50baac1503bf824cb9bcda
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_fra.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "For mathematical questions provided in French language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_fra_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_hau.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74fabdee06aa0669ada4ba5fe46bd9a6ad6bdd32
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "For mathematical questions provided in Hausa language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_hau_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_ibo.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2dd77f238af5a67eccad3e05f6add96c3d1f321
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "For mathematical questions provided in Igbo language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_ibo_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_kin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba3f3c2f2b264b333a6fe1903c6f84d6d6000692
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "For mathematical questions provided in Kinyarwanda language. Supply\
+  \ the accurate numeric answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_kin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_lin.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74131916ab079d9c1bee95d169517661e4d11b31
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "For mathematical questions provided in Lingala language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_lin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_lug.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b92bc4e6574ac7df4aad89c143687747ed5d9863
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "For mathematical questions provided in Luganda language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_lug_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_orm.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c33dd44cc0019f3485ca72368c767b9161ec983
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_orm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "For mathematical questions provided in Oromo language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_orm_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_sna.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2518c36981f50157890029d3fb49d596ea688b6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "For mathematical questions provided in chiShona language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_sna_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_sot.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06cb1b05569674070ee6915f9acf6bd32c6ed72a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "For mathematical questions provided in Sesotho language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_sot_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_swa.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a08c8e3a98efcc19a3f48ec4dc46d220fd7a9ab
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "For mathematical questions provided in Swahili language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_swa_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_twi.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54de3ce507d4f5f4b0f098ec8d9a71e37fb75fc4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_twi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "For mathematical questions provided in Twi language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_twi_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_vai.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab3337a7eaf4f7f3a4d06945fd56b78b7d74763f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_vai.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: vai
+doc_to_text: "For mathematical questions provided in Vai language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_vai_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_wol.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e711e8efa4b2e854d9924f953dce931ea3a0ba1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_wol.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "For mathematical questions provided in Wolof language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_wol_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_xho.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..728cacf881aef3014669a43c2ffc5316664decad
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "For mathematical questions provided in isiXhosa language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_yaml
+task: afrimgsm_xho_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca8bb03f88f2a98928fd21794753f0ff990bb309
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_yaml
@@ -0,0 +1,33 @@
+tag:
+    - afrimgsm_tasks
+    - afrimgsm_tasks_prompt_5
+dataset_path: masakhane/afrimgsm
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_yor.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd0bea64173d14db8bc6d472bb685ee9f7b420a8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "For mathematical questions provided in Yoruba language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_yor_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_zul.yaml b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb8474e31f328120b855c6e45d5340de8bd6c266
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct/prompt_5/afrimgsm_zul.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "For mathematical questions provided in Zulu language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_yaml
+task: afrimgsm_zul_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/afrimgsm_cot.yaml b/lm_eval/tasks/afrimgsm/direct_cot/afrimgsm_cot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d07832b4ddb9d7b1306e36cc5c39f11ee840661b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/afrimgsm_cot.yaml
@@ -0,0 +1,9 @@
+group: afrimgsm_cot-irokobench
+task:
+  - afrimgsm_cot_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_amh.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c9f0d9311aa462f2f4c3e9d38c5d0407ae7b72d7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_amh_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_eng.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57c0e564b1b594fe21203a0d73737301167b63cb
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_eng_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_ewe.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55fdff7c365f895a4835b6234a7027a28b7c42a3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ewe_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_fra.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..717a45d98beabb763984f030350252b6d3424747
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_fra_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_hau.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f42e0ee559e562a934c3151cc9fd535442c6cf28
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_hau_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_ibo.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dfabc3e319236dd2112ab74bbb5d1181f63d8d55
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ibo_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_kin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55d20e011aa15f290f292411604391e2c761053f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_kin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_lin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ecbf38d859c25896eb039d87ed9b36a4797a77de
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_lug.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..033cbce071fed9d337fecfc002b3988671cafe9a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lug_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_orm.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ce25aee61aa076b7c250c00a3a49f552c82f9d3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_orm_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_sna.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fae029f19280d7bf4f0928a63022c6deeae09f6d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sna_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_sot.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0d1207791575accf84f4f6839e8b41f688ba0d9
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sot_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_swa.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da4e39cfc0a7b28c341ba8557971bcedd944e9d4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_swa_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_twi.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24f553497c41bb4c42b99d9ba33abc21d42ebf15
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_twi_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_vai.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc63717012dd2df5a3965b352363ac9a2457372f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_vai_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_wol.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c86b09d6a9961af49a771afd9ef62f991edc8084
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_wol_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_xho.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f03080c3c6c986c760578e766e8d23c2e58a17f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_xho_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ab733bf114ba32013ab433ca74a1f05b66f8a78
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_yaml
@@ -0,0 +1,37 @@
+tag:
+    - afrimgsm_cot_tasks
+    - afrimgsm_cot_tasks_prompt_1
+dataset_path: masakhane/afrimgsm
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+training_split: train
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+  - <|eot_id|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_yor.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6858ab21d02d14d797ce3704c940fd627feb2be
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_yor_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_zul.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5cacc2be5c426dde5bdfeb5dc5e3e62b20e494b8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_1/afrimgsm_cot_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_zul_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_amh.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d5d43fb8562da74b469cc15636aaac35c082ef2
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_amh_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_eng.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84a6b26dd7d5f17dc76f2bc5c28bfa05d0936805
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_eng_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_ewe.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7ef2907f54397390ff29aa44612d6a24a3a9380
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ewe_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_fra.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..987ac630ee57139c93588b86dc8ae53bbd6c466e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_fra_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_hau.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..488f693a60c4a31cec8adf1e2665cf7cd6430b18
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_hau_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_ibo.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aefa0aa229981bd07b9e221a66d4ebce08c0f086
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ibo_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_kin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e183dcd48f80b358ed54bdde2fd8d9e3ac64a9ba
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_kin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_lin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..840a99acfacba2dacfe8c2883fed6445373f72e7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_lug.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75e8b89276ae9db785fbc75cf403bffb6c6a5f1c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lug_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_orm.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a36d89355bc25c75f7097c371ed685c1f05a8290
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_orm_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_sna.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25187ceccadcbb09e0a8bf417d5b2ae4b4572b0d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sna_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_sot.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22fc718bc20db26210333151faf22c5a5c0bbef7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sot_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_swa.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0d91d0c9e3cebf8352c589c4ab6ac7a30183129
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_swa_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_twi.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..03b59a394366bca2b5241bd3a096b7b274354cad
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_twi_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_vai.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8fa4cf5e2db3121bae1a296472cefecc34c31ceb
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_vai_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_wol.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2611de84f42fd9a0804cb155f94b687bab875a81
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_wol_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_xho.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33059776d4a77e62c6a0478efa64ecd8da5c6a0f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_xho_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..505336ba01a57df47f10104bedb8af7288e4d98d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_yaml
@@ -0,0 +1,37 @@
+tag:
+    - afrimgsm_cot_tasks
+    - afrimgsm_cot_tasks_prompt_2
+dataset_path: masakhane/afrimgsm
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+training_split: train
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: 'Give direct numerical answers for the question provided. \n\nQuestion: {{question}} \Step-by-Step Answer: '
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+  - <|eot_id|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_yor.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..991297c4dd40aaeab196f45fbf6dcb6521432c25
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_yor_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_zul.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..833edbb1a704f59fc39000ccb8447fec46d55849
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_2/afrimgsm_cot_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_zul_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_amh.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00f830a20eb0e580600868cd88ca9d25231352c1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_amh_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_eng.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea0937f233d75fea06dde81274381f2085a4416e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_eng_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_ewe.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dfe111d7fc2b3ebc91463719305e734c02a2360f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ewe_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_fra.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb82d3a44416adc5abb640d460d49c428e71f1bf
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_fra_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_hau.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3162114b1eeb9a931ad6c006636fa8d57605c272
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_hau_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_ibo.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f46191a331e0498b6a93cdda5f51750b42702423
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ibo_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_kin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ddc82ee85066be8890b863bc042bc66cc44316e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_kin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_lin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..769ae73aa82813832af62ccf69a1e37e0bf688ae
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_lug.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e04769a6c77dc035b03e48dfffb6fc2ef90081b6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lug_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_orm.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..79a696581beac371a7afbf71e918d6e6f43263c5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_orm_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_sna.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f08d44259f104d9b72c5ed732c0dbb6938524703
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sna_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_sot.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..76501f53d90f79d70f86ec52b8cbe3d329db7b7e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sot_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_swa.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..76ea5f96ae6a2618b54a1009684d8e632c9ebd0e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_swa_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_twi.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c45b3f0fcfab17390f077b2d161f76f097dfda33
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_twi_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_vai.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca50c481fd6c8470bc6d61e012969033cbf2bcfb
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_vai_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_wol.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16dbc506ef36a0799834d1cd5af1166d029453d3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_wol_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_xho.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a329b8ebf4487eb5b922f539274b6592d6d2e75f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_xho_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4d3657da5f68eb670173ff86034aa2276c2c0ae
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_yaml
@@ -0,0 +1,37 @@
+tag:
+    - afrimgsm_cot_tasks
+    - afrimgsm_cot_tasks_prompt_3
+dataset_path: masakhane/afrimgsm
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+training_split: train
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: 'Solve the following math question \n\nQuestion: {{question}} \nStep-by-Step Answer: '
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+  - <|eot_id|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_yor.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..003fb63482132f44de4252d96afb25808f99c876
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_yor_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_zul.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c01468ec7f6a5e6d87f132599fc5df4879516c95
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_3/afrimgsm_cot_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_zul_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_amh.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6624ddfe5319d574caf011f2de342bb68d516771
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_amh_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_eng.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa82cb876081070e9a300dd1471f18c78a8cc311
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_eng_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_ewe.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..135bd975b0a9a4a37f2d3fcce07c6d96b9c9579e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ewe_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_fra.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81a060b2c2b7ce84836f88aac2e5e386c2ad2e6b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_fra_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_hau.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b53dba5852f274ada14954ef6b839f288d1629dd
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_hau_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_ibo.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a4236e1d6a36e7d241c9d65ca1c111b8bdac536
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ibo_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_kin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51407a6626539a7a79899f5691122c4c5c0881db
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_kin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_lin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..248ffeee0130f295668f8b9596dea68b6b077527
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_lug.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbf7c8cd5edf41c05c347423dcb61cb5f84420f3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lug_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_orm.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..218c3f90a1882f702fed06585f30694ef8e9e96b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_orm_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_sna.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81e4840a177d442dd62d2fd08a3e3b36458a65b0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sna_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_sot.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47bcd414523bf5a4b81440e08476c9f9a2d4e794
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sot_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_swa.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e0b57a14edab9695ceda40dc5a1d44fcd0eb2230
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_swa_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_twi.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abdbdec70ee3203fd39cc99293c52aecc88f37e0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_twi_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_vai.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0b7913b381f35893889225d9cfd3aaaf25555c9
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_vai.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: vai
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_vai_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_wol.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa75a3f599284d5dfc9dd58f4647140180e93378
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_wol_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_xho.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c125ebedfab2aa922c27627ec29b582b3d6fa37
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_xho_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..59013d84ed916ab9728f3345f6323b9fbee4c8d6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_yaml
@@ -0,0 +1,36 @@
+tag:
+    - afrimgsm_cot_tasks
+    - afrimgsm_cot_tasks_prompt_4
+dataset_path: masakhane/afrimgsm
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+training_split: train
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+  - <|eot_id|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_yor.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c960b75f60dcfef53fac9f5cff333baeeb00ef2
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_yor_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_zul.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2641b2e58c631b88f85ee8c156a9028c8b319c86
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_4/afrimgsm_cot_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_zul_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_amh.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea5124850e7ce8cdf270c5cd53020c61d9e10491
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "For mathematical questions provided in Amharic language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_amh_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_eng.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b485061e5974c691035eaaf18af67001b921e73
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "For mathematical questions provided in English language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_eng_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_ewe.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e52f43276704281a95bd366f8512ca6c9af3f4c3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "For mathematical questions provided in Ewe language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ewe_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_fra.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f311e12a3b8bbb77e9f24d11fa15efbea38330f5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "For mathematical questions provided in French language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_fra_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_hau.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..91cc7ace3922ace059d7c4f789d70ba1162d183b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "For mathematical questions provided in Hausa language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_hau_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_ibo.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2c6a5cc7fb2a54afcfdb98b2d176bda42b23aaf
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "For mathematical questions provided in Igbo language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_ibo_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_kin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36c19a99fdf7dfe8410a2d3183e31f633455932c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "For mathematical questions provided in Kinyarwanda language. Supply\
+  \ the accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_kin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_lin.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..419da8ab7aebe31e28c40bb9bd80b34e5b87f867
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "For mathematical questions provided in Lingala language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_lug.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..918a3e31484a4f42b5c78bd53a5ad2194e23507d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "For mathematical questions provided in Luganda language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_lug_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_orm.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9a448f2571248957e397d2660d6bc019ac9245d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "For mathematical questions provided in Oromo language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_orm_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_sna.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..645b2898c4ad5ec017af12c813ea3d27f1231d55
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "For mathematical questions provided in chiShona language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sna_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_sot.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0b940d919edb0a710784ffcbdab00110b568a65
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "For mathematical questions provided in Sesotho language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_sot_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_swa.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..093ccfa2f705e7f94cffed63e0406f0653f1c867
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "For mathematical questions provided in Swahili language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_swa_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_twi.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd0436e7e8ad0a80e32554fb9991158fe66df7be
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "For mathematical questions provided in Twi language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_twi_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_vai.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b348be3a2a337a96936ddafe9315fe060dc8516
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_vai.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: vai
+doc_to_text: "For mathematical questions provided in Vai language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_vai_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_wol.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b73863adafc318c81c058efdfb3fd251cb987a95
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "For mathematical questions provided in Wolof language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_wol_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_xho.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b77d56f2115fc188b5d973d5aaeb33b506830b5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "For mathematical questions provided in isiXhosa language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_xho_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de15089149d133ac1f84ee89d1a20634286ed10c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_yaml
@@ -0,0 +1,36 @@
+tag:
+    - afrimgsm_cot_tasks
+    - afrimgsm_cot_tasks_prompt_5
+dataset_path: masakhane/afrimgsm
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+training_split: train
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+  - <|eot_id|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_yor.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9032313ad485bd0d9e2b90854bb626b432dc1a46
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "For mathematical questions provided in Yoruba language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_yor_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_zul.yaml b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b6ef0037a6bb530304d7fd5031c2f6816d678a3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/direct_cot/prompt_5/afrimgsm_cot_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "For mathematical questions provided in Zulu language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_yaml
+task: afrimgsm_cot_zul_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml
deleted file mode 100644
index f00400d96d15547bb73acd53c84ad5d4ce6f024f..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: amh
-doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_amh
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_eng.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_eng.yaml
deleted file mode 100644
index c62bf206a3ff5644c5d213ef394f4f0cbe3667d0..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_eng.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: eng
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_eng
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml
deleted file mode 100644
index ea246f7c16cec59da6562b0e17b43da0268caa0e..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: ewe
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_ewe
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml
deleted file mode 100644
index 16bf57b76e4d48384ee909854ce7ac4050215894..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: fra
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_fra
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_hau.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_hau.yaml
deleted file mode 100644
index 2a397baf1e40185883569b53ffc9bb82265b4257..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_hau.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: hau
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_hau
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ibo.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ibo.yaml
deleted file mode 100644
index 9bd7bf62b4c9fed96aa01280c9d157a08cc04efb..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ibo.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: ibo
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_ibo
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml
deleted file mode 100644
index 841913b7c689a30833282cd40fdbc6a6db4a3dac..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: kin
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_kin
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml
deleted file mode 100644
index 76d7fdb91fb8dd39b23d4c8c5a0513eaa6538a6d..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: lin
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_lin
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lug.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lug.yaml
deleted file mode 100644
index 84c05bb292fdec783de75f708002ad5e53c3e3fc..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lug.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: lug
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_lug
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml
deleted file mode 100644
index e9e5600e99104054e169ef1d29da528ef5a9be39..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: orm
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_orm
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml
deleted file mode 100644
index 058689623d3fa6147743052f840ab25f8ef0bb4f..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: sna
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_sna
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sot.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sot.yaml
deleted file mode 100644
index ae443f1833c3b248941bd0cdbae2e0a058625d4a..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sot.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: sot
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_sot
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_swa.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_swa.yaml
deleted file mode 100644
index 1aa2d07d0e132e0cf2787d75ab6e7281b4302f97..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_swa.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: swa
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_swa
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_twi.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_twi.yaml
deleted file mode 100644
index 2957cb378e5ec6b27f0911eeab048aa91bf40e43..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_twi.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: twi
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_twi
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml
deleted file mode 100644
index 6ecf4c44eff8d04d081a15062272ba168bab7ded..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: wol
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_wol
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml
deleted file mode 100644
index 9dc6691bdee31264bcba551b0288980de24b6e7f..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: xho
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_xho
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml
deleted file mode 100644
index 8ef29830fa23b3fa561276bf6472a453c7e80384..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: yor
-doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_yor
diff --git a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml b/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml
deleted file mode 100644
index 24f486e0af03eda4a290eee0881da5a3b07dd96c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: zul
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: cot_yaml
-task: afrimgsm_en_cot_zul
diff --git a/lm_eval/tasks/afrimgsm/en_cot/cot_yaml b/lm_eval/tasks/afrimgsm/en_cot/cot_yaml
deleted file mode 100644
index b4a0071d0e35ecc03d0899541c2fa3a1af9a32a9..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/en_cot/cot_yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# This file will be included in the generated language-specific task configs.
-# It doesn't have a yaml file extension as it is not meant to be imported directly by the harness.
-tag:
-    - afrimgsm
-    - afrimgsm_en_cot
-dataset_path: masakhane/afrimgsm
-dataset_name: null  # Overridden by language-specific config.
-output_type: generate_until
-training_split: train
-test_split: test
-generation_kwargs:
-  until:
-    - "\n\n"
-    - "\n"
-  do_sample: false
-  temperature: 0.0
-target_delimiter: " "
-metric_list:
-  - metric: exact_match
-    aggregation: mean
-    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
-filter_list:
-  - name: "strict-match"
-    filter:
-      - function: "regex"
-        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
-      - function: "take_first"
-  - filter:
-    - function: regex
-      group_select: -1
-      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
-    - function: take_first
-    name: flexible-extract
-metadata:
-  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/gen_utils.py b/lm_eval/tasks/afrimgsm/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecef389f3a4051e57b652f617b19ddd15d3c26ca
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/gen_utils.py
@@ -0,0 +1,122 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_4": "Answer the given question with the step by step solution appropriate numerical value, ensuring that the response is "
+        "clear and without any supplementary information. \n\nQuestion: {{question}} \nStep by step answer: ",
+        "prompt_5": f"For mathematical questions provided in {lang} language. Supply the accurate step by step answer to the "
+        "provided question. \n\nQuestion: {{question}} \nStep by step answer: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "eng": "English",
+        "amh": "Amharic",
+        "ibo": "Igbo",
+        "fra": "French",
+        "sna": "chiShona",
+        "wol": "Wolof",
+        "ewe": "Ewe",
+        "lin": "Lingala",
+        "lug": "Luganda",
+        "xho": "isiXhosa",
+        "kin": "Kinyarwanda",
+        "twi": "Twi",
+        "zul": "Zulu",
+        "orm": "Oromo",
+        "yor": "Yoruba",
+        "hau": "Hausa",
+        "sot": "Sesotho",
+        "swa": "Swahili",
+        "vai": "Vai",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"afrimgsm_cot_{lang}.yaml"
+            task_name = f"afrimgsm_cot_{lang}_{mode}"
+            yaml_template = "afrimgsm_cot_yaml"
+            if "translate" in output_dir.split("/")[-1]:
+                file_name = f"afrimgsm_cot_translate_{lang}.yaml"
+                task_name = f"afrimgsm_cot_translate_{lang}_{mode}"
+                yaml_template = "afrimgsm_cot_translate_yaml"
+            if int(mode.split("_")[-1]) > 3:
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": lang,
+                    "doc_to_text": prompt_func(mode, languages[lang]),
+                }
+            else:
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": lang,
+                }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./translate_cot",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_5",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml
deleted file mode 100644
index 55fbe4bfdb590b6d352b71c16eebefef3cbb3399..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: amh
-doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_amh
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml
deleted file mode 100644
index 1d729a5cab74ddeb5b3e03f97eadef54a5be3a3c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: eng
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_eng
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml
deleted file mode 100644
index 26191dc815bc0747c05af177e38662e4c4581bfb..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: ewe
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_ewe
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml
deleted file mode 100644
index 9f0331ee8f3f730372c3eaecb0defe0887bd6502..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: fra
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_fra
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml
deleted file mode 100644
index 850dad6351a693c2a738a0a570e15da8b412a63a..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: hau
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_hau
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml
deleted file mode 100644
index 8b81178cc719c44419e24b5e14fc5c3e61b73a7a..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: ibo
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_ibo
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml
deleted file mode 100644
index 5a8f53e2e7e7449b1db465062bfb8524b94d3c85..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: kin
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_kin
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml
deleted file mode 100644
index 58044ee2b887d3a83f9004e303da6c2bc048703f..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: lin
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_lin
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml
deleted file mode 100644
index 87013c146f2ef8bddee0a82c2c21949bcac549b0..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: lug
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_lug
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml
deleted file mode 100644
index 1dd19325a57022df444f04eba5eb1b3ced117b61..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: orm
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_orm
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml
deleted file mode 100644
index d710b1da339ca0012239993417f83c946a7c3e09..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: sna
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_sna
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml
deleted file mode 100644
index 643eaaeef10a1f70b3b7f13b58cb606dd6ae3f73..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: sot
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_sot
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml
deleted file mode 100644
index b882e89c24a75ce06a1790791a084e1c087acc1b..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: swa
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_swa
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml
deleted file mode 100644
index ac946eb7f413d227dfe0fc5b770e0c6c7bc2d159..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: twi
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_twi
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml
deleted file mode 100644
index dbcc6b2e0e553ebe5353abaebbf6030d68c5b024..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: wol
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_wol
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml
deleted file mode 100644
index dfb3d74f40fac640988e1ffba3caf007d56b66ec..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: xho
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_xho
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml
deleted file mode 100644
index 6b4c346ffeeacd42de58efab206db84af0168670..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: yor
-doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_yor
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_zul.yaml
deleted file mode 100644
index 5e79edffadafebb8e31c710e854157046d15b10e..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_zul.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Generated by utils.py
-dataset_name: zul
-doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
-generation_kwargs:
-  do_sample: false
-  until:
-  - 'Question:'
-  - </s>
-  - <|im_end|>
-include: translate_direct_yaml
-task: afrimgsm_translate_direct_zul
diff --git a/lm_eval/tasks/afrimgsm/translate/afrimgsm_tt.yaml b/lm_eval/tasks/afrimgsm/translate/afrimgsm_tt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1cc68abefd07d38ef64c3524337be287a20e779
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/afrimgsm_tt.yaml
@@ -0,0 +1,9 @@
+group: afrimgsm_tt-irokobench
+task:
+  - afrimgsm_tt_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f067e53f525c95640c43cd94f02dfca0e4702a8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_amh_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1420deed028ca24caea7a72b96bcc53494f7e186
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ewe_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b38e82f252e4379f8332ba7b779617db5d53da24
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_fra_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..768bcab970c2b6af4edbdaa0897ea4f24e1d0eb5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_hau_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5333b163698e15aa1b2548395dfa1069e188b6b1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ibo_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae231d6da0ca0de8571eaaf38cb62121fe57d125
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_kin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65349c7e66fbda73b3262d21fba18c69a88a318a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7643fc1223f63f698a8ef70835beccea7de9d7a5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lug_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55e1992799923fc30ffea628a64b1d4bcab31bf0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_orm_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f8826ab4d61ab2a2803c73fd770db538a4db0f0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sna_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b206e3fc82f9e519aece099aa2e5d31c783df62
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sot_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3aede319a2ad47e5e0e0a65c18ca8a85da812971
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_swa_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8e23103ee7a482a1f84002c3dc40bc10758e659
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_twi_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b97922fc5d1d051a0a98623a994b695dbf68c86
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_wol_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1abdd50bced23bc6de7768b9f2db931c7f35b4ad
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_xho_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..614510895a32f30082ccd6eb5cbdfa87766c4473
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_yaml
@@ -0,0 +1,32 @@
+tag: afrimgsm_tt_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3927ba8e0369a5220df6d72e4bb474b7e8af7ca
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_yor_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a57260d94ac8b83f7371baecab3e822868dc671b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_1/afrimgsm_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_zul_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..49b559be5c1f62ab67498131d29ac0e0091f622d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_amh_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf82f8624adc0bf9b60194b889ee8ccc7df76c70
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ewe_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..924ac026e258ab6da6bc0c2be9c55e73f48a3457
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_fra_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86d8dbbca635d347640c699db7bde0cb6d950722
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_hau_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..466ced5c435be500ff453e9242097050cfcc587c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ibo_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53078341b5822305b3cc5de3652f0e582313aff6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_kin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72aa73d209de818f83d596148e756752bc44c754
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..88ae24a26d652729a5c52696f8c34fcec358dfd8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lug_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e2ffcc32241ed36d042591921737f9c91deabcc
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_orm_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..137ccbcd322421d17c4ea369c34f1beac05ce597
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sna_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5bd7e53ca6ec65492978eb7e2bc95a0569b496e8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sot_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5134b3c423c88832393b6eb7b74338b8f7e97807
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_swa_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6135d99ce5181855cb78f1a0beba3f357c7082c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_twi_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db00be881c3170721857cfb3fb685b3adfec4dfc
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_wol_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3be8dd64c7e7aaacebb296c8f51397d4c50fe3ec
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_xho_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63766339e6e0cb5bbb3d9f45d1010d00de0aafd4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_yaml
@@ -0,0 +1,34 @@
+tag: afrimgsm_tt_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: "Give direct numerical answers for the question provided. \n\nQuestion: {{question}} \nAnswer: "
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+should_decontaminate: true
+doc_to_decontamination_query: "Answer: "
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..01a54e15eea260f6a4ae5fa28e0ea4c627dc904c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_yor_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f7e74df5ba87e25f35e0e13762cd2116797fe65
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_2/afrimgsm_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_zul_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04a14a1bf6b69e10873e181d46fcd51e2e901126
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_amh_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3cda09e47c41ecc23f2e15b16c869fd6e3f13d87
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ewe_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..49c95be2cd94d718bd6cb1754eee6a79ae6f5ae8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_fra_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d16ac8faf84785902baabb92a1a48bcf383a35a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_hau_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2bbb66ff41f6c55799d77a19e29bfd8e01f0d61d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ibo_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..488061a306ebcbc4e9f53e78214e25d5391475fa
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_kin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..928ba457061168ae1524db65a4f03f6ec202349d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdc0c80788f1e0e0e4db9bc38d77aab7e2f0f8df
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lug_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04ec7565c10592c8ff143d0d2a944986e2870c1f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_orm_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22ab7bde213b00599cee3e97ef8e4995f2ded97a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sna_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..617340d07bdab98b185c8e441e1a2e08bca3930b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sot_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..337ad6e470631085a735ee2133211c8e9258600e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_swa_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb13aba534c504473735b6cb67c90efab5db9095
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_twi_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f759e6aa6aebd9f6c6f82087dc2756d68a71025e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_wol_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50ab80df1d0589e592b072f71628395fe118ef8b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_xho_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..544fa0ccc13fbbeb9eebc4f0eb2cb78b5c68e183
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_yaml
@@ -0,0 +1,32 @@
+tag: afrimgsm_tt_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: "Solve the following math question \n\nQuestion: {{question}} \nAnswer: "
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee1c7917f6356e01b54ad4c545d3f08b2dfcf8a6
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_yor_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3e21704f20ceba6e34273f8a2d5b1d87b648dda
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_3/afrimgsm_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_zul_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60387afce4fa5801492c968fc97a1519fdbeb5a8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_amh_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7633bc3efd95541fd6f30aaa8d469fa993a0375e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ewe_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a8e16ea929f8d5639f388c339671f1253554fd1d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_fra_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9828205094c1b447ce422da748ba347d448f3b0b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_hau_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8acf8d0fb40630e1c19e54ed5b87687f2bf2897
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ibo_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74ac117344b5625f1a8671cb938e7d911e849eea
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_kin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cf113619e461a91f0a4517dae08109055ed743f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5dffbdb899140911f0dab979581fc7b0d52674bf
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lug_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30f776f4b6c6a05a98b79234dbf45f269170dc0b
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_orm_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63efa2505e80032a7ef347f1f986f0ec0952c07c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sna_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19b86220a5eeb0319c7732add1ed7d969ab72c67
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sot_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20236ae8e25cbcc8cd43dd54157614518eea26d8
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_swa_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5fe7e7475ae49642169275f3aff639ba4d3fbeda
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_twi_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a8fb5640def2bb261c461a80b66ba50324231317
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_wol_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fdb63749c47b38edfb97d20998f4fb8d4479075d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_xho_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d3903948dd4a8ae2c285e33eb2551f554d2310c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_yaml
@@ -0,0 +1,31 @@
+tag: afrimgsm_tt_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5cb74d41b85d71f851f6c3f160edbc85d006151
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_yor_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f0a068e9f662d567bb01a208e46a6e0b2d014d2
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_4/afrimgsm_translate_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Answer the given question with the appropriate numerical value, ensuring\
+  \ that the response is clear and without any supplementary information. \n\nQuestion:\
+  \ {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_zul_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48ca09aaafafe7360ca9b0c2872d99e63ff16619
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "For mathematical questions provided in Amharic language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_amh_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4a254f0a2d766380528dde5d2ad9ac7eb67bb2d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_ewe.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "For mathematical questions provided in Ewe language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ewe_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac62304550bb0f5b8705c7a9ba3f5934278be55d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_fra.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "For mathematical questions provided in French language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_fra_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..695f1f373464fb93811f28644c0b849fe72de9ed
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "For mathematical questions provided in Hausa language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_hau_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7fd530e7409124357c091d42cbaf5608473976c0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "For mathematical questions provided in Igbo language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_ibo_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52ea0a78a2053f7958583167d613ca209b43e22a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "For mathematical questions provided in Kinyarwanda language. Supply\
+  \ the accurate numeric answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_kin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07cf6a6b0e871bbf004d809d4fcff2f7f063a2a9
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "For mathematical questions provided in Lingala language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa3461beb8fc3c9f05dd1a58c5ca7e7de4ac6cb3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "For mathematical questions provided in Luganda language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_lug_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1a00385f578feeb5fb5071c7f2835574a68a30e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_orm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "For mathematical questions provided in Oromo language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_orm_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7f08a786ecac0379aa660a26e5619055e97063a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "For mathematical questions provided in chiShona language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sna_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b258204f4eaed7d4889c597189ab910956e9dbb5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "For mathematical questions provided in Sesotho language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_sot_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a950c84d3c617353fd9492e6a1d2a028fd836881
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "For mathematical questions provided in Swahili language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_swa_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a0488295249513408b1d33de3a246b663cc523a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_twi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "For mathematical questions provided in Twi language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_twi_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..61ffc3f938f8cf13da5bac35bed1c6d2a9323acf
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_wol.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "For mathematical questions provided in Wolof language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_wol_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c308cc7f57b90ddfe959132e75aad3cc5a0b6f01
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "For mathematical questions provided in isiXhosa language. Supply the\
+  \ accurate numeric answer to the provided question. \n\nQuestion: {{question}} \n\
+  Answer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_xho_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d3903948dd4a8ae2c285e33eb2551f554d2310c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_yaml
@@ -0,0 +1,31 @@
+tag: afrimgsm_tt_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+target_delimiter: ""
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2a0a0fd28fc895e25d309a8f5aaf64769e7de6c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "For mathematical questions provided in Yoruba language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_yor_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b52cfb72dfe0f89092718c46e6fd4360a5dd3646
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate/prompt_5/afrimgsm_translate_zul.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "For mathematical questions provided in Zulu language. Supply the accurate\
+  \ numeric answer to the provided question. \n\nQuestion: {{question}} \nAnswer: "
+include: afrimgsm_translate_yaml
+task: afrimgsm_translate_zul_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate/translate_direct_yaml b/lm_eval/tasks/afrimgsm/translate/translate_direct_yaml
deleted file mode 100644
index f9f1c866e8f7ef9ac1153b2248d519aac2a9d1b1..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimgsm/translate/translate_direct_yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# This file will be included in the generated language-specific task configs.
-# It doesn't have a yaml file extension as it is not meant to be imported directly
-# by the harness.
-tag:
-    - afrimgsm
-    - afrimgsm_translate
-dataset_path: masakhane/afrimgsm-translate-test
-dataset_name: null  # Overridden by language-specific config.
-output_type: generate_until
-test_split: test
-generation_kwargs:
-  until:
-    - "\n\n"
-    - "\n"
-  do_sample: false
-  temperature: 0.0
-target_delimiter: " "
-filter_list:
-  - name: remove_whitespace
-    filter:
-      - function: remove_whitespace
-      - function: take_first
-  - filter:
-    - function: regex
-      group_select: -1
-      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
-    - function: take_first
-    name: flexible-extract
-metric_list:
-  - metric: exact_match
-    aggregation: mean
-    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
-metadata:
-  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/afrimgsm_tt_cot.yaml b/lm_eval/tasks/afrimgsm/translate_cot/afrimgsm_tt_cot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d43ddd233b785cbfba006785de6db94bb4eb5d97
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/afrimgsm_tt_cot.yaml
@@ -0,0 +1,9 @@
+group: afrimgsm_tt_cot-irokobench
+task:
+  - afrimgsm_tt_cot_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da7764e81c0665c53c129f42d61629460a74ea1e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_amh_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e65e9298656895f4dab45111420da97559d62023
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ewe_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a16b91ffb250880c9217e63d6e8c1e46c7d4021c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_fra_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3bee8575de4f774c0ae7510e3a074023919dbbe3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_hau_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e6f495eaeb6738e0b8b9524341c1bc2453b4c00f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ibo_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..400bf8887718fbe40d6701cb2121a3cd271c1360
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_kin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22599e98c83a4eae8b0c6b103396195af55fbbee
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lin_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83c9565d54a2786fea141b6775681172cf49b592
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lug_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca19eb14ef29d16affc7efe255e3faa3ae4deb06
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_orm_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e99d3aa7b59d92691fc89687fcf951a993487f89
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sna_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f8fc2ef28d888800670a9f3068f0d58d670d5ec
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sot_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0545cccda86167036dc321b11d29c1ca1ca2542
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_swa_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0b4f9716cb900a71167b85a68ac402470aec3f0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_twi_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_vai.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..76c18a3f9172645111dbcb26e0183b6d48f3fc69
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_vai_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee0d6fc9babb8f968cdcbbd460bdb83f14e14c06
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_wol_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f340a46529bd305f4bdc5ea73b7cd148ec6d7d1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_xho_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ad7f0069cd8b63090b625ef103a37154356782c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_yaml
@@ -0,0 +1,33 @@
+tag: afrimgsm_tt_cot_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf093766dc687b5a092d984ab8cda6514ffd56a9
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_yor_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1bb302a4d0f2268543c228ba13d0744509c4911d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_1/afrimgsm_cot_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_zul_prompt_1
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9f97735373ee9e47f6234b27002f6f12edb1a13
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_amh_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a83764758c3e2540d3a097ed0e0f5ac987604a1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ewe_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b496c775817645ce477d84749de8e71f0badbc22
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_fra_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1022ae899b0e2413351e01aafef9de08b00688ba
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_hau_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd2a2528ef1c104f1714735f1a5b753c10966607
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ibo_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4d8a986acd1dd95e9560555b44f2d3f5aed5395d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_kin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70d4032301398b2124ff128ebb9ed1ba4eb0f0ea
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lin_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a774c189513e159a0d9cdf034cd0470cc25d8b84
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lug_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b325b2ce931cbcf05cee50293845043081097387
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_orm_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e85255881b279d8d4f578bfcbfd96355e8af3cd
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sna_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a298b504439fa6c7d8eab548ecf2a0b997eddc9d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sot_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3de9a4c61b8a018007b13e16cd9028d31af92c2
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_swa_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2e1ab61ec9af7af947ae656d2c7069230ee02c5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_twi_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_vai.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9186f1e00c540dc64bb89ca619439e8b927162d5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_vai_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..185b406be03d84beef24b6c6fc453a4518d7a66f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_wol_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52a0e1ca5144bda20184bcc081768098d945a239
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_xho_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad059aead35b933cabaf763d549c59592f006fc7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_yaml
@@ -0,0 +1,33 @@
+tag: afrimgsm_tt_cot_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: 'Give direct numerical answers for the question provided. \n\nQuestion: {{question}} \Step-by-Step Answer: '
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2452b0fae4a9f939ed115736056091302b9cfa78
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_yor_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ce8151b79849bbc85be11e8cd535bf7a1e12ede
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_2/afrimgsm_cot_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_zul_prompt_2
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b627e57564d6839ec2ffde82c0a125e42a5c5b77
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_amh_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52dc345f0cc5f2ad617db14a22327d4b2fc298bd
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ewe_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2b7582c34e26f4e6e9cd87aa1c675788a23ccae
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_fra_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d57be8c8c4c9dbaebaef523ff4bd9310df1ebc40
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_hau_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..296ea98fc1dd70f50bb72ddb566d738d11d42f68
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ibo_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b555b3e314cc6b50970f06649e7d1f662370073
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_kin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ace69b273060675411488fa639f261b2fb39f8a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lin_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd25a1661f0720e68e2851701a1a9ed8f0131950
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lug_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..698c1474bd76edf931839d75b3111bafe8b0770c
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_orm_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..354df6bfef4ea01a0b40b6361adb5d21470d395e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sna_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5990be74d1cceb58eff6a4f9648d1f3de0e11d8a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sot_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d86662980bf7078dde5636aba335bab9897619c4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_swa_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78ef85fc25c0da4894843624afad970c9ff572b0
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_twi_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_vai.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25ec4e8fd71e2423370182180efc7b1bc7843e38
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_vai.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: vai
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_vai_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7815a0a5f4b77403334b41baebb2e529eda19d1a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_wol_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e45afd3a0ecc2ce8656e70ebfe29cad1f6ff06ba
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_xho_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0bb7d6661f0b78b5e417269c61f0e7fc028848f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_yaml
@@ -0,0 +1,33 @@
+tag: afrimgsm_tt_cot_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: 'Solve the following math question \n\nQuestion: {{question}} \nStep-by-Step Answer: '
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..39e18cb48d1f214a13f5beb5a1e2c4d4f34855af
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_yor_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08fbc9e15e1d963da67e5723f33b926701ca9503
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_3/afrimgsm_cot_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_zul_prompt_3
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f73f15f9ef2ba8f0d75e0a0a74cbf00e84cf8e5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_amh_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d57247b8b512c60f5536cade1bbc7804083b2f5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ewe_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a2f70ca6327e79044f7ed1685282ab8803fcc7d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_fra_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5e7903c88c05de0ac942fd0894b523209dad320
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_hau_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf15ed077cd3ff2762deaf532d200e117fb6e9dd
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ibo_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1b57c1395efb57ed2d8e31df9f2878cfbad59ad
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_kin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81cdb1ff41ede1fd2ceca5c5ebaf099df5df5d45
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lin_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a949b0289211c9443a6a73880598c762a8c5a8f9
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lug_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4deb09238913761e594ec4967a4dabd9d188b02
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_orm_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ebac1993f84026c0823e52fb41c6574e816b4c7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sna_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf83c8f0209f627b01da77a7ab033ab93a276891
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sot_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87b581b8a5fdb5e9d2d0eede15f5c98b04f88693
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_swa_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..223901eb208af30d0a8dc549f7d021d343e17076
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_twi_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_vai.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..92ce3892451070777235d493be10bbe9811ad05d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_vai.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: vai
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_vai_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c626fde4e599303ff73e490d3cbc38b6335d6755
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_wol_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..285b679cc0a7da31dc3d719918ead217d5287b9e
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_xho_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..241787c7aa25a0ac46e2556efdb8db633e7a0719
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_yaml
@@ -0,0 +1,32 @@
+tag: afrimgsm_tt_cot_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f76f4cd109026c0c94a578674c4d8140201e5bfa
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_yor_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7023a5540612c3f7a568313f7d0d26e541839c49
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_4/afrimgsm_cot_translate_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Answer the given question with the step by step solution appropriate\
+  \ numerical value, ensuring that the response is clear and without any supplementary\
+  \ information. \n\nQuestion: {{question}} \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_zul_prompt_4
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_amh.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d64f088f46c537580055f91f3eaa347187531da4
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "For mathematical questions provided in Amharic language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_amh_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_ewe.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de4aa48d48be2c3ea31b03cb497c4b881ab09ead
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "For mathematical questions provided in Ewe language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ewe_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_fra.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5cf15ea1ad1e7937cb46cf7f5c09014e00e7fea7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "For mathematical questions provided in French language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_fra_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_hau.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dfa643c4cf14b0bbd871f6293f5b142a6a0337a
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "For mathematical questions provided in Hausa language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_hau_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_ibo.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..959f389070977606a0d330573c4c4015754b80d7
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "For mathematical questions provided in Igbo language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_ibo_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_kin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85ff4196d50b187779c8c39b62cfcb5448ddb258
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "For mathematical questions provided in Kinyarwanda language. Supply\
+  \ the accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_kin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_lin.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87db46c9761e033a42739d1aaaf1d14e51989d14
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "For mathematical questions provided in Lingala language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lin_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_lug.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac0fde85c1607b570abb2c0fa602d0f680ef3fe1
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "For mathematical questions provided in Luganda language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_lug_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_orm.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bcf34106d924c07a92e63eaacdd3f112e860c25d
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "For mathematical questions provided in Oromo language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_orm_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_sna.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5eac98d4e45e5c199cc5a19286c7d0edcd04a9f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "For mathematical questions provided in chiShona language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sna_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_sot.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9fc015cdf75b34a6147183cfd3ad2f8bbe4d4660
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "For mathematical questions provided in Sesotho language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_sot_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_swa.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..179af86738feaf0656c65e260cf465284c9bc3e5
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "For mathematical questions provided in Swahili language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_swa_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_twi.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ebb680a6f5e54727adabbdd517819e90ca9c2b96
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "For mathematical questions provided in Twi language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_twi_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_vai.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_vai.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d2848648e5a752973ca5a21a38b0a2fb82b4127
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_vai.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: vai
+doc_to_text: "For mathematical questions provided in Vai language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_vai_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_wol.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..799cc29fbbca2a4a328cec6bf679cc68ad51139f
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "For mathematical questions provided in Wolof language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_wol_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_xho.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7969fdbabd911f8fe4ffdfb9f7e47364c3a80857
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "For mathematical questions provided in isiXhosa language. Supply the\
+  \ accurate step by step answer to the provided question. \n\nQuestion: {{question}}\
+  \ \nStep by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_xho_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..241787c7aa25a0ac46e2556efdb8db633e7a0719
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_yaml
@@ -0,0 +1,32 @@
+tag: afrimgsm_tt_cot_tasks
+dataset_path: masakhane/afrimgsm-translate-test
+dataset_name: null  # Overridden by language-specific config.
+output_type: generate_until
+test_split: test
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+filter_list:
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
+      - function: "take_first"
+  - filter:
+    - function: regex
+      group_select: -1
+      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+    - function: take_first
+    name: flexible-extract
+metadata:
+  version: 2.0
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_yor.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d05de223110d6e434bcf75bb2f9cf71957b76d3
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "For mathematical questions provided in Yoruba language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_yor_prompt_5
diff --git a/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_zul.yaml b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68329068941f34bf7e53739334e8101c46a0ecfe
--- /dev/null
+++ b/lm_eval/tasks/afrimgsm/translate_cot/prompt_5/afrimgsm_cot_translate_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "For mathematical questions provided in Zulu language. Supply the accurate\
+  \ step by step answer to the provided question. \n\nQuestion: {{question}} \nStep\
+  \ by step answer: "
+include: afrimgsm_cot_translate_yaml
+task: afrimgsm_cot_translate_zul_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..202c31825bfcdaa8ea974e8f51444bc864ed4306
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu.yaml
@@ -0,0 +1,13 @@
+group: afrimmlu-irokobench
+task:
+  - afrimmlu_tasks_prompt_1
+  - afrimmlu_tasks_prompt_2
+  - afrimmlu_tasks_prompt_3
+  - afrimmlu_tasks_prompt_4
+  - afrimmlu_tasks_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
deleted file mode 100644
index 53acc4c83206969667d4792eb35ddf8645fcf5ae..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-tag:
-  - afrimmlu
-  - afrimmlu_direct
-task: null
-dataset_path: masakhane/afrimmlu
-dataset_name: null
-output_type: multiple_choice
-validation_split: validation
-test_split: test
-fewshot_split: validation
-doc_to_text: !function utils.doc_to_text
-doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
-doc_to_choice: !function utils.doc_to_choice
-should_decontaminate: true
-doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
-metric_list:
-  - metric: f1
-    aggregation: !function utils.weighted_f1_score
-    # aggregation: mean
-    average: weighted
-    hf_evaluate: true
-    higher_is_better: True
-    ignore_case: true
-    ignore_punctuation: true
-    regexes_to_ignore:
-      - ","
-      - "\\$"
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
-    regexes_to_ignore:
-      - ","
-      - "\\$"
-metadata:
-  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml
deleted file mode 100644
index aa60c668fd9b2879f020f990655e7eedce2b3a81..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_amh.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: amh
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_amh
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
deleted file mode 100644
index a1e647cdf1d0278c73744288fa61cd7709550231..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: eng
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_eng
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
deleted file mode 100644
index 1cc45ddc0e50d1bb4992aecdb4f5208dbb77881b..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: ewe
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_ewe
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
deleted file mode 100644
index e6adb6c8aa4e50c6efca737792907cb658c30627..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: fra
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_fra
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
deleted file mode 100644
index 9cc9a1ae7acc7318faf68a241f68b0d5cba93978..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: hau
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_hau
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
deleted file mode 100644
index 6abb2c4a467986751376679b31ec5db8a7af0886..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: ibo
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_ibo
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
deleted file mode 100644
index 2f81f709c4812db3ecfa71bbb9cfb74099a10aab..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: kin
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_kin
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
deleted file mode 100644
index 55363ed93772284fc54386592ae827c03246d681..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: lin
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_lin
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
deleted file mode 100644
index 0d484427eda8fcd4b645b3f90b191f075cb88ce9..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: lug
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_lug
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
deleted file mode 100644
index 763eb8a75f894797185436d3a83c9fd57393f4ac..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: orm
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_orm
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
deleted file mode 100644
index ed9e69af392838290bac14d08259585c56daace8..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: sna
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_sna
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
deleted file mode 100644
index acdba0fdccf12f73004669dbed1b7cbee9ded24f..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: sot
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_sot
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
deleted file mode 100644
index c1aa82b0b1d44314c337b904c346806cb3c720a4..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: swa
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_swa
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
deleted file mode 100644
index 2695d4a156d4b59dbb2c483ebdbbc16e01c7a415..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: twi
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_twi
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
deleted file mode 100644
index 027f837637fb061d227d33e925d3030af51c3cbe..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: wol
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_wol
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
deleted file mode 100644
index 8e0c12972d01be342a6838b0eab4c1f609d6dc48..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: xho
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_xho
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
deleted file mode 100644
index 2a9f7645c2259a607f871e54b07c14ab962ed04c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: yor
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_yor
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
deleted file mode 100644
index 9d8d3b415b44ef4ab0b762f411006c7b00d54226..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: zul
-include: afrimmlu_common_yaml
-task: afrimmlu_direct_zul
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct
new file mode 100644
index 0000000000000000000000000000000000000000..a3e17f711f6eac83c52fad1d3f0314a01f08d169
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct
@@ -0,0 +1,37 @@
+tag:
+    - afrimmlu_tasks
+    - afrimmlu_tasks_prompt_1
+    - afrobench_mmlu_tasks
+dataset_path: masakhane/afrimmlu
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_amh.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a26369b36ee47ed6ac21c448c316acaf90af749
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_direct
+task: afrimmlu_direct_amh_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18a34c7b719cdef3254e2472399b7fdd3121d543
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimmlu_direct
+task: afrimmlu_direct_eng_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e85bd7dc7dcc4fa2f5ea90f4f540a0a75b160dbd
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_direct
+task: afrimmlu_direct_ewe_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e8a2875e71f43dbdd148331d24e6440f92ad71f
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_direct
+task: afrimmlu_direct_fra_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b438ea3198a18caf74d44a97f2d4752337edc082
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_direct
+task: afrimmlu_direct_hau_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b08d48e0a3c42e123c081935d5ffcc71e1c56c7
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_direct
+task: afrimmlu_direct_ibo_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00d82dfa57e6a77d52f476ae78c54edbc677628d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_direct
+task: afrimmlu_direct_kin_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7059c941d2dffd27c8eda15dc1fc087a626455a2
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_direct
+task: afrimmlu_direct_lin_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3301647298d216de664ff07e2c8a10e134afe388
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_direct
+task: afrimmlu_direct_lug_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5047ae98abd0d3ea8ebeb98233ae4fb1ebb42dab
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_direct
+task: afrimmlu_direct_orm_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..17222f95253270fdcff74177fbd0474cb75660b3
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_direct
+task: afrimmlu_direct_sna_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c62ce9bf4957545dc39f96d7bd6dc60ce60e868a
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_direct
+task: afrimmlu_direct_sot_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5ebed9f96e857356adf2ccaa2de2cf818874e71
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_direct
+task: afrimmlu_direct_swa_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb270c949a7a250e4f4810a5086a80ff22716f1f
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_direct
+task: afrimmlu_direct_twi_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ccbc47cd02c5de0c886deed9f4549141884eac8
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_direct
+task: afrimmlu_direct_wol_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e30d2017740585b5c675ec898fa9d9512e4ac52
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_direct
+task: afrimmlu_direct_xho_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3de56f8d3c9fe1f283712ea359ead734a413d93
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_direct
+task: afrimmlu_direct_yor_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86c56fec097dc4c636070f6c0ab0750a23bb2435
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_1/afrimmlu_direct_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_direct
+task: afrimmlu_direct_zul_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/direct/utils.py b/lm_eval/tasks/afrimmlu/direct/prompt_1/utils.py
similarity index 100%
rename from lm_eval/tasks/afrimmlu/direct/utils.py
rename to lm_eval/tasks/afrimmlu/direct/prompt_1/utils.py
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct
new file mode 100644
index 0000000000000000000000000000000000000000..fefabf7e0b52e644d1e9d922c8f899607eab6075
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct
@@ -0,0 +1,37 @@
+tag:
+    - afrimmlu_tasks
+    - afrimmlu_tasks_prompt_2
+    - afrobench_mmlu_tasks
+dataset_path: masakhane/afrimmlu
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_amh.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85d85171bfe9d84205a9ab218ed496aed1eecf73
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_direct
+task: afrimmlu_direct_amh_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c46eca5e68356372fc43c1b1908e45667ff05d12
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimmlu_direct
+task: afrimmlu_direct_eng_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..26acfcfa93b3019a746a5e9a78e4cdb48871c978
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_direct
+task: afrimmlu_direct_ewe_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47f0bfb14b6cd7eaf34618eb0709f9f3f0c9b666
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_direct
+task: afrimmlu_direct_fra_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29b4a4d2029e945c4bf52654a819dcb89b898431
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_direct
+task: afrimmlu_direct_hau_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cf7db0e4c0585c2cde8f0645e64438546ba5818
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_direct
+task: afrimmlu_direct_ibo_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce7c2e896509b874459deb156f2ba34287a908c9
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_direct
+task: afrimmlu_direct_kin_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51fcea62af8ca4fa64643ef4ea170444ce25beef
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_direct
+task: afrimmlu_direct_lin_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f4c57ae36fbad9a75737562b6cb619b53e933c34
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_direct
+task: afrimmlu_direct_lug_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..494d4240693fe9907f965cd8ad5ccc71fcfc2868
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_direct
+task: afrimmlu_direct_orm_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7706ad64ccc0baef8fc4964e61871805187db548
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_direct
+task: afrimmlu_direct_sna_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..353bd2574657f0b7f49d0be77edd777893ac549b
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_direct
+task: afrimmlu_direct_sot_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54a16c6c2f5839c6aec545c74f6a5df3293938df
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_direct
+task: afrimmlu_direct_swa_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bb35bd5f9ad784fbf2101e3ce14e82710c75858
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_direct
+task: afrimmlu_direct_twi_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..963f7cd2cdee47fda381af3cbe1a63c56601b709
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_direct
+task: afrimmlu_direct_wol_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9da0589a8bfe5791ff3764fd26e1f375a836b701
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_direct
+task: afrimmlu_direct_xho_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..39b365418eda1be6e4c344d4e32717045d2eafda
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_direct
+task: afrimmlu_direct_yor_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8766392a0dc7e8abd5b8145f37bcd13f424a8b6a
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/afrimmlu_direct_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_direct
+task: afrimmlu_direct_zul_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_2/utils.py b/lm_eval/tasks/afrimmlu/direct/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0cfb334c27cfe4c5bbb1ff7126215c0ea9130c9
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_2/utils.py
@@ -0,0 +1,30 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """As an expert in {subject}, choose the most accurate answer to the question below.
+Your goal is to select the correct option 'A', 'B', 'C', or 'D' by understanding the nuances of the topic.
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct
new file mode 100644
index 0000000000000000000000000000000000000000..fb2fd165fcba0457c82e825afa5d8252546dc09c
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct
@@ -0,0 +1,37 @@
+tag:
+    - afrimmlu_tasks
+    - afrimmlu_tasks_prompt_3
+    - afrobench_mmlu_tasks
+dataset_path: masakhane/afrimmlu
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_amh.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7c28f20b09193f8a0a5c1c0f4ffd8ae59312a08
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_direct
+task: afrimmlu_direct_amh_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83f7cfcb32c1d85061a3d9b6e1cca169a61d4ff0
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimmlu_direct
+task: afrimmlu_direct_eng_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..351bdf330c4b30d85448e45b9233aaf6cb704c4b
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_direct
+task: afrimmlu_direct_ewe_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..691978187578805d95bc215c8d678273f02d343d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_direct
+task: afrimmlu_direct_fra_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90521523bef4afe8420fc579108dd2353afb49f3
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_direct
+task: afrimmlu_direct_hau_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43a88fe6c13d804531aa7e251fe86c0102562bc9
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_direct
+task: afrimmlu_direct_ibo_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..977f3ab259efac5e6afcccdfd44e04279651ad18
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_direct
+task: afrimmlu_direct_kin_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d25584a3fe0d9c7a73e13ab7a3f1b8652616efd
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_direct
+task: afrimmlu_direct_lin_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b4da1a7f550f66c1b3f084879413d2d9fc13641
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_direct
+task: afrimmlu_direct_lug_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2738f980d41cea65fa790aa16232a0f6a7584226
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_direct
+task: afrimmlu_direct_orm_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..063d111ac4aa0c8625d8615e7b13c1d10ac906fb
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_direct
+task: afrimmlu_direct_sna_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cf6e66d4fb91e3e98ed5b0fe955379bcb31bf26
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_direct
+task: afrimmlu_direct_sot_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e90204d40f2fef22b46b190b552cd9f9fcb777b0
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_direct
+task: afrimmlu_direct_swa_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..719ebe9002cc9fab19dfa793153d779ad8ffbee6
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_direct
+task: afrimmlu_direct_twi_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f0f1d0d709b8f0dddd5708b66f6d27a122984d0
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_direct
+task: afrimmlu_direct_wol_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8fc1af4d171021226243c69a59b44363b4a16639
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_direct
+task: afrimmlu_direct_xho_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a641b03ae173f2342e8d7178119e68fea2e5f000
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_direct
+task: afrimmlu_direct_yor_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c6b493d34a99a4250676c2f0130ceff6b4ea4f8
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/afrimmlu_direct_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_direct
+task: afrimmlu_direct_zul_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_3/utils.py b/lm_eval/tasks/afrimmlu/direct/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc3da2e29667b4b25f68757e2169a5c8aa0c8dea
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_3/utils.py
@@ -0,0 +1,32 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """You are a subject matter expert in {subject}.
+
+  Utilizing your expertise in {subject}, answer the following multiple-choice question
+  by picking 'A', 'B', 'C', or 'D'.
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct
new file mode 100644
index 0000000000000000000000000000000000000000..c15b7b2fc3991517b15f2c370a246adb907f2e52
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct
@@ -0,0 +1,37 @@
+tag:
+    - afrimmlu_tasks
+    - afrimmlu_tasks_prompt_4
+    - afrobench_mmlu_tasks
+dataset_path: masakhane/afrimmlu
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_amh.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc862dc2327d23f394742ee51031e37fc7c97ff1
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_direct
+task: afrimmlu_direct_amh_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..69baef502b78bce307b77a7c44c8c4323ebc1102
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimmlu_direct
+task: afrimmlu_direct_eng_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5af1074f4b993daa4f2468f60baf83b48bfc470
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_direct
+task: afrimmlu_direct_ewe_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d1f94eea1e47bd9442dbc76181069bf47ac29da1
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_direct
+task: afrimmlu_direct_fra_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca8f7c5ed0ae15bc5a5e96c776f2251f2cff06fa
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_direct
+task: afrimmlu_direct_hau_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a181d07cc6a81aaf42308fc137c2241a0d8d444
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_direct
+task: afrimmlu_direct_ibo_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f86122466a7db1fdc06a2352385fdc1fc78bd69
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_direct
+task: afrimmlu_direct_kin_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3c7d3ecf7a86a68248a49d5fc97947ca8da69b0b
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_direct
+task: afrimmlu_direct_lin_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..467201319f12257549de2fa3c260591dee13f311
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_direct
+task: afrimmlu_direct_lug_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e52668253d495c2583d9f5e964dc73c6850a5729
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_direct
+task: afrimmlu_direct_orm_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..af29225a1ca8c63c042d661ce5dc5331a44ed28a
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_direct
+task: afrimmlu_direct_sna_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0342dc10b71f69880b3a3352f9d9def10f9815c8
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_direct
+task: afrimmlu_direct_sot_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec9a3525f534ac5ea7ed6817d8f52bc67b57c445
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_direct
+task: afrimmlu_direct_swa_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83dc916c68120a6a28234dfaad48b71c9cbfdba3
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_direct
+task: afrimmlu_direct_twi_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e656af2c6697004f7ea94ff638b8b8d4f9f8d549
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_direct
+task: afrimmlu_direct_wol_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab23d9346400d5ec0eedb7f91f530a04499f163c
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_direct
+task: afrimmlu_direct_xho_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dd0254819a9ef2b6b2b795bbcfad7ed8ef1c314
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_direct
+task: afrimmlu_direct_yor_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..98a0937fc74a67a0fae7faa39164f10c288aa3f7
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/afrimmlu_direct_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_direct
+task: afrimmlu_direct_zul_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_4/utils.py b/lm_eval/tasks/afrimmlu/direct/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..29c23b7f856b2ab4ead359cafbbf404241e53ffb
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_4/utils.py
@@ -0,0 +1,28 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """Analyze each question critically and determine the most correct option based on your understanding of the subject matter
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct
new file mode 100644
index 0000000000000000000000000000000000000000..3da1eb827af65c9bcb69dd4af7eab06df848ade2
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct
@@ -0,0 +1,37 @@
+tag:
+    - afrimmlu_tasks
+    - afrimmlu_tasks_prompt_5
+    - afrobench_mmlu_tasks
+dataset_path: masakhane/afrimmlu
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_amh.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cff031d7936a5b82bacf175d8d17e54a51d7fe92
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_direct
+task: afrimmlu_direct_amh_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52f317982b39dd21a3e11ce6695b95af9ef8f8df
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrimmlu_direct
+task: afrimmlu_direct_eng_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cef2f86599b589c38e7fb30723a3024b3fcfeffe
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_direct
+task: afrimmlu_direct_ewe_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..042c0bbbf0c5afc60776127770b88d15c7c7c5ea
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_direct
+task: afrimmlu_direct_fra_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd507182558a8884859713d4fbcf356898d9176c
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_direct
+task: afrimmlu_direct_hau_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e9839001ed95c56ae13b1fa97466fbbb39c4acb
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_direct
+task: afrimmlu_direct_ibo_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d157559f8c8ea7172bdf64a78506e02830ee633
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_direct
+task: afrimmlu_direct_kin_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6eca1f8e7ce1b442be6b62ec924143d736f97c62
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_direct
+task: afrimmlu_direct_lin_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..854b160dc4dc6bc4192ab6c2f73a0e8286da6376
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_direct
+task: afrimmlu_direct_lug_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9592e585bbe1c62b6b4f2eb25ab02facda7bc242
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_direct
+task: afrimmlu_direct_orm_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51d05c686db303d55507c16f8a31c56ec3222f29
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_direct
+task: afrimmlu_direct_sna_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cce0e4607d45d5e302f7642b2343a7b1a1dcf991
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_direct
+task: afrimmlu_direct_sot_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1cd2672b09792c6fd0cf6a9c3c77b83ac5cdbcb
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_direct
+task: afrimmlu_direct_swa_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e2e258c6d020e6599fe3d8fd92388958fce14b1
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_direct
+task: afrimmlu_direct_twi_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d721871b35a08b564c04af09ca32349a4433bf93
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_direct
+task: afrimmlu_direct_wol_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c1507260fd7c79cfde49981a52fffa5b5cc89d2
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_direct
+task: afrimmlu_direct_xho_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f528abb1018753945b954945138084b2d7327ce
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_direct
+task: afrimmlu_direct_yor_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec83abebdb65535e345a1c488c2e2999f798d373
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/afrimmlu_direct_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_direct
+task: afrimmlu_direct_zul_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/direct/prompt_5/utils.py b/lm_eval/tasks/afrimmlu/direct/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a47ceca967c136c7df7132d826ac51af26039722
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/direct/prompt_5/utils.py
@@ -0,0 +1,29 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """Given your proficiency in {subject}, please answer the subsequent multiple-choice question with 'A', 'B', 'C', or 'D'.
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/gen_utils.py b/lm_eval/tasks/afrimmlu/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a195b6b5852d35042c14632597762a3965faae07
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/gen_utils.py
@@ -0,0 +1,103 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "eng": "English",
+        "amh": "Amharic",
+        "ibo": "Igbo",
+        "fra": "French",
+        "sna": "chiShona",
+        "wol": "Wolof",
+        "ewe": "Ewe",
+        "lin": "Lingala",
+        "lug": "Luganda",
+        "xho": "isiXhosa",
+        "kin": "Kinyarwanda",
+        "twi": "Twi",
+        "zul": "Zulu",
+        "orm": "Oromo",
+        "yor": "Yoruba",
+        "hau": "Hausa",
+        "sot": "Sesotho",
+        "swa": "Swahili",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"afrimmlu_direct_{lang}.yaml"
+            task_name = f"afrimmlu_direct_{lang}_{mode}"
+            yaml_template = "afrimmlu_direct"
+            if output_dir.split("/")[-1] == "translate":
+                file_name = f"afrimmlu_translate_{lang}.yaml"
+                task_name = f"afrimmlu_translate_{lang}_{mode}"
+                yaml_template = "afrimmlu_translate"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+            }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./direct",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_4",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml
deleted file mode 100644
index fad9467833b401251480e02d8449ff97c1280a3a..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-tag:
-  - afrimmlu_translate
-task: null
-dataset_path: masakhane/afrimmlu-translate-test
-dataset_name: null
-output_type: multiple_choice
-test_split: test
-doc_to_text: !function utils.doc_to_text
-doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
-doc_to_choice: !function utils.doc_to_choice
-should_decontaminate: true
-doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
-metric_list:
-  - metric: f1
-    aggregation: !function utils.weighted_f1_score
-    # aggregation: mean
-    average: weighted
-    hf_evaluate: true
-    higher_is_better: True
-    ignore_case: true
-    ignore_punctuation: true
-    regexes_to_ignore:
-      - ","
-      - "\\$"
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
-    regexes_to_ignore:
-      - ","
-      - "\\$"
-metadata:
-  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_amh.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_amh.yaml
deleted file mode 100644
index ac88ffa9500701e8bbb2b5c64d1f4c9f2ec856bc..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_amh.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: amh
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_amh
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml
deleted file mode 100644
index 0be98beedd86223dd14c1abbf51dbe93c7ff658a..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: eng
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_eng
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml
deleted file mode 100644
index 624342b91f383479c7ef340bfb80ce305608cf61..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: ewe
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_ewe
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml
deleted file mode 100644
index c4fd7e1fc774b6dd987e6c35d3a3fadbf6d577c4..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: fra
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_fra
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml
deleted file mode 100644
index aaeb415fa2a00516ea3a84133066b7eae009f017..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: hau
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_hau
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml
deleted file mode 100644
index 93fb24e8c3fa799a41c022a708748bb5e7341631..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: ibo
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_ibo
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml
deleted file mode 100644
index f39f666840626dcf6ea61a196be702ec1c3e3308..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: kin
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_kin
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml
deleted file mode 100644
index c935ee47382973e3dbe833987ea083bd3023b5cd..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: lin
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_lin
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml
deleted file mode 100644
index 72e4bce0113c8473eabf68a7d2e43ba2eabc965c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: lug
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_lug
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml
deleted file mode 100644
index 3ff902499480d35576cb84453406a5d484349816..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: orm
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_orm
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml
deleted file mode 100644
index 9979740a9bf6194d9a9c4db0f0b4845312f1aed7..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: sna
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_sna
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml
deleted file mode 100644
index deb2b9b81d544140bfa7e720d0b544089b39bfcd..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: sot
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_sot
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml
deleted file mode 100644
index e58d90bc69357a3b9c166e8f29000894daa8b108..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: swa
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_swa
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
deleted file mode 100644
index 51a2d26ae0563acda4972b272de4c0d6de81146f..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: twi
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_twi
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
deleted file mode 100644
index 006b684782c853a432d9e694abe525aaeb9609ca..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: wol
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_wol
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
deleted file mode 100644
index c0bdf4471b2178c67d7f6e1ae9c5fba16b3b7710..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: xho
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_xho
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
deleted file mode 100644
index 0e7ba6005b591141dc84efa454196458c1261e8c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: yor
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_yor
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
deleted file mode 100644
index a18d251cc8f838fa2578019475b089c4b61ecf65..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-dataset_name: zul
-include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_zul
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_tt.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_tt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbbf9387e5563914e7b89a06540d73156c8fb1b9
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_tt.yaml
@@ -0,0 +1,9 @@
+group: afrimmlu_tt-irokobench
+task:
+  - afrimmlu_tt_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate
new file mode 100644
index 0000000000000000000000000000000000000000..7a974279a3918de90369c391b09de818cb1b483d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate
@@ -0,0 +1,32 @@
+tag: afrimmlu_tt_tasks
+dataset_path: masakhane/afrimmlu-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_amh.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aaaaa6b8b653a2093459dfc1d7649c932b1e1957
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_translate
+task: afrimmlu_translate_amh_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45298a1f4ed4c0f101f816b7adc46115da2b3aba
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_translate
+task: afrimmlu_translate_ewe_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ac43a8060bf6e3dff94cd141c2bcb02014e8abd
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_translate
+task: afrimmlu_translate_fra_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c09424d3812e26e2c8ff8a2bc08eecec7690f9fd
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_translate
+task: afrimmlu_translate_hau_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6fe139910f24c12612a64c9633fd2dd625c580cc
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_translate
+task: afrimmlu_translate_ibo_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c689952307fc07588af39a3c9db587aac74c4389
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_translate
+task: afrimmlu_translate_kin_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e245d7bdd3b96e16b84da7db505d0b005cb165fc
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_translate
+task: afrimmlu_translate_lin_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bcbac5f65455134c60b873710d7c2e66cf0ac7ca
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_translate
+task: afrimmlu_translate_lug_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84b3d2c35042fa836f7450fd48e4654876c0473e
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_translate
+task: afrimmlu_translate_orm_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..722ee9526102ab230cab4472842fda9e902f9119
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_translate
+task: afrimmlu_translate_sna_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e8893aa9d996552eb8a830dbaca3c8ad8988ee3
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_translate
+task: afrimmlu_translate_sot_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb89697c67df29132bfb5db2c4d381f24f133e92
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_translate
+task: afrimmlu_translate_swa_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d672f6c768b823615c67ee46c9b48ae87d8177a9
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_translate
+task: afrimmlu_translate_twi_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9fb5f3709d4bfb2702903eb6b50f36bd9b02c34a
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_translate
+task: afrimmlu_translate_wol_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a06af041863759d796db54f8276c7722d4abf92
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_translate
+task: afrimmlu_translate_xho_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f5eb7de392b44c98f4d415dff01eb0dda357247
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_translate
+task: afrimmlu_translate_yor_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae04b652fe227cecb10f25f8dd998a9ccb50dba3
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/afrimmlu_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_translate
+task: afrimmlu_translate_zul_prompt_1
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_1/utils.py b/lm_eval/tasks/afrimmlu/translate/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1bb9162f0fbc68807db68134970ae2636980cbf
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_1/utils.py
@@ -0,0 +1,32 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """You are a highly knowledgeable and intelligent artificial intelligence
+                model answers multiple-choice questions about {subject}
+
+                Question: {question}
+
+                Choices:
+                        A: {choice1}
+                        B: {choice2}
+                        C: {choice3}
+                        D: {choice4}
+
+                Answer:  """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate
new file mode 100644
index 0000000000000000000000000000000000000000..7a974279a3918de90369c391b09de818cb1b483d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate
@@ -0,0 +1,32 @@
+tag: afrimmlu_tt_tasks
+dataset_path: masakhane/afrimmlu-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_amh.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..283b6ff1798eba1b331edc46036b5cf38e482443
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_translate
+task: afrimmlu_translate_amh_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..39e9f7355ace85592c87ad626303f3d4cc89a16b
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_translate
+task: afrimmlu_translate_ewe_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ced80282a8addde381a1b719e10e871adb2cf533
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_translate
+task: afrimmlu_translate_fra_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d687cac47605f507f67c4a30d39e975a2a794cb
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_translate
+task: afrimmlu_translate_hau_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d1edfaa3999a264b903295abfc34d39cdcece572
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_translate
+task: afrimmlu_translate_ibo_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48fa15181a1c6e031581cb7e80da907788b90750
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_translate
+task: afrimmlu_translate_kin_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54163d5ccfad9c3973265750ca52b522cf685c7d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_translate
+task: afrimmlu_translate_lin_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6b709ec0b5d967579097b2251f16e278db940e5
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_translate
+task: afrimmlu_translate_lug_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a58330c01ee2da2d2b76b3f700ccdad665450fa
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_translate
+task: afrimmlu_translate_orm_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0edd502ec7d909dcde2026f7e2bc0d7747032728
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_translate
+task: afrimmlu_translate_sna_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f499691fb172c8ddc1a89abc1d3ac64c4303286
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_translate
+task: afrimmlu_translate_sot_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f267b6d087b15b710a44e8015f2d77070c35853d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_translate
+task: afrimmlu_translate_swa_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b145669ebb274b39da4826cf7a62c4dd0d72dd35
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_translate
+task: afrimmlu_translate_twi_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c87a8d61d88ab5109759ac57cc18c1935f9596b0
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_translate
+task: afrimmlu_translate_wol_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f55271270b73c6c10eb29765ae8852383a2c832
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_translate
+task: afrimmlu_translate_xho_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5cdbc6b28f26ffa7d227618e1adf04aa13f0d8d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_translate
+task: afrimmlu_translate_yor_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ff80402b4d66972d6cbde5aec2eb63163aa35d5
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/afrimmlu_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_translate
+task: afrimmlu_translate_zul_prompt_2
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_2/utils.py b/lm_eval/tasks/afrimmlu/translate/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0cfb334c27cfe4c5bbb1ff7126215c0ea9130c9
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_2/utils.py
@@ -0,0 +1,30 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """As an expert in {subject}, choose the most accurate answer to the question below.
+Your goal is to select the correct option 'A', 'B', 'C', or 'D' by understanding the nuances of the topic.
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate
new file mode 100644
index 0000000000000000000000000000000000000000..7a974279a3918de90369c391b09de818cb1b483d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate
@@ -0,0 +1,32 @@
+tag: afrimmlu_tt_tasks
+dataset_path: masakhane/afrimmlu-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_amh.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0377257387ded41038b9d77532536cd5383a02b0
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_translate
+task: afrimmlu_translate_amh_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa924284fc298a2171f6cd13e69684d1e26cc166
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_translate
+task: afrimmlu_translate_ewe_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7bef6e78de5ed0f69cda1e26383e05cc5735c23d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_translate
+task: afrimmlu_translate_fra_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d9841db93cb047f4b5a21118a1c7ce5f036ea1d4
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_translate
+task: afrimmlu_translate_hau_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20c58b27ee0e4c3d73affc18d621a8db348f278e
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_translate
+task: afrimmlu_translate_ibo_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed3cfd16498db252b6c48eda56079b253595cfa7
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_translate
+task: afrimmlu_translate_kin_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9eeb66eaadf15cbeec8cfcbf6664fa6eb74da889
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_translate
+task: afrimmlu_translate_lin_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..46f722b757ee9acb3520868d428bb51735343687
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_translate
+task: afrimmlu_translate_lug_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f95b375edb041fd68f907a0b00411beb4808e50
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_translate
+task: afrimmlu_translate_orm_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa031e5c8a875c5fe75f11efa01dc044a003b1a4
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_translate
+task: afrimmlu_translate_sna_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e55ce671f15df0c1478357f4f8dad2fa4a4f09ed
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_translate
+task: afrimmlu_translate_sot_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f507772fb75616c8fe9a44ccd4a9549125046b8
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_translate
+task: afrimmlu_translate_swa_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1ea25d8b7f3ff1a3ce70f0a4ed68d2e999ff2143
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_translate
+task: afrimmlu_translate_twi_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a077d853ed9ff74956449be43bd5f5fc36fac6b
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_translate
+task: afrimmlu_translate_wol_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18678da825da9e065ed6825d3d7955e30e9c7fd0
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_translate
+task: afrimmlu_translate_xho_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..815d219ba75659cf564b691a6c859042b6a397b6
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_translate
+task: afrimmlu_translate_yor_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f294a873804351a29f63276958d10a963e461519
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/afrimmlu_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_translate
+task: afrimmlu_translate_zul_prompt_3
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_3/utils.py b/lm_eval/tasks/afrimmlu/translate/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4609d97afdbe27cb5b55da9f057a08a2f73d649
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_3/utils.py
@@ -0,0 +1,32 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """You are a subject matter expert in {subject}.
+
+  Utilizing your expertise in {subject}, answer the following multiple-choice question
+  by picking ''A'', ''B'', ''C'', or ''D''.
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate
new file mode 100644
index 0000000000000000000000000000000000000000..7a974279a3918de90369c391b09de818cb1b483d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate
@@ -0,0 +1,32 @@
+tag: afrimmlu_tt_tasks
+dataset_path: masakhane/afrimmlu-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_amh.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90c880241df63265e7c8e7a60228163024394e9c
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_translate
+task: afrimmlu_translate_amh_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c63ccfb49c04b4bfbbbf54c2d6a85d3308a64708
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_translate
+task: afrimmlu_translate_ewe_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..740f87adbe66021a281f516836558a767ab91c68
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_translate
+task: afrimmlu_translate_fra_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f73a2edd89efa810435fa4363fe69a7b0855425
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_translate
+task: afrimmlu_translate_hau_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..424fbab9ad78b940e8c8f252d774ab474a606878
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_translate
+task: afrimmlu_translate_ibo_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cafcae600b0f4f74160353609f68c6fecf055068
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_translate
+task: afrimmlu_translate_kin_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..618f4aaf77e7694b2265bb1084630c7927460daa
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_translate
+task: afrimmlu_translate_lin_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe59cfd265493fb8d54c46793ac69a7d6f5e7279
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_translate
+task: afrimmlu_translate_lug_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f25d96c45abc75c963effa1a0dcba504fdb81ce
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_translate
+task: afrimmlu_translate_orm_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0943eec102650255d5955fde8b995eea2274c83a
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_translate
+task: afrimmlu_translate_sna_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..882117e4c7c10e6b0eabd80531d136189891188f
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_translate
+task: afrimmlu_translate_sot_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..92928aae81f1bc2f27e99b6e5ad20401990a102f
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_translate
+task: afrimmlu_translate_swa_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d093262712cefd637aa1861887ff92ddd077270
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_translate
+task: afrimmlu_translate_twi_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..66161c7dd6a149ab713ae6f92f595b097c6ba794
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_translate
+task: afrimmlu_translate_wol_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25ff91f0004a504f48b1ed9c0ea3b000a7c7fcf2
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_translate
+task: afrimmlu_translate_xho_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54743fddbc83195064abc456b8dca0ff750c6fe5
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_translate
+task: afrimmlu_translate_yor_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ebd80f22645d4cb5dfbd584b6fa60a45edaf3f44
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/afrimmlu_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_translate
+task: afrimmlu_translate_zul_prompt_4
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_4/utils.py b/lm_eval/tasks/afrimmlu/translate/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..29c23b7f856b2ab4ead359cafbbf404241e53ffb
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_4/utils.py
@@ -0,0 +1,28 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """Analyze each question critically and determine the most correct option based on your understanding of the subject matter
+
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate
new file mode 100644
index 0000000000000000000000000000000000000000..7a974279a3918de90369c391b09de818cb1b483d
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate
@@ -0,0 +1,32 @@
+tag: afrimmlu_tt_tasks
+dataset_path: masakhane/afrimmlu-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function utils.doc_to_text
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
+doc_to_choice: !function utils.doc_to_choice
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_amh.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b5ebb387b40e7a12a103a72ee43b3f711de9a7f
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrimmlu_translate
+task: afrimmlu_translate_amh_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4860b79c529b23f7827c6b25527e0070b32c36bc
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrimmlu_translate
+task: afrimmlu_translate_ewe_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..192f3423797a807c355fc21bb4c1e9137cc31b72
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrimmlu_translate
+task: afrimmlu_translate_fra_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1fc6aafbb4d14111f18e31957c888fcd773acdef
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrimmlu_translate
+task: afrimmlu_translate_hau_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a6cca83cd7eb6b765b5512827044547e01810813
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrimmlu_translate
+task: afrimmlu_translate_ibo_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6acd743a77e5b04b5df5ddf8861984f206697542
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrimmlu_translate
+task: afrimmlu_translate_kin_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f62c43f601a2d0e48b462fbf3cc4c1ce09760c50
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrimmlu_translate
+task: afrimmlu_translate_lin_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b8a97f30edee493743ed39a4ff2e8ede1b1ab4c
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrimmlu_translate
+task: afrimmlu_translate_lug_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0eebc1bdf290e775f6e2a7bba8601a7d980ed884
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrimmlu_translate
+task: afrimmlu_translate_orm_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60d4d57f5bb6d793467316ac4fe2c2c97d055289
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrimmlu_translate
+task: afrimmlu_translate_sna_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eeddbb7b71328018a6ecdc1317c37c288a4f1806
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrimmlu_translate
+task: afrimmlu_translate_sot_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24a3b78e2d1c5b1323a335cbe20034a7a7e4b0b7
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrimmlu_translate
+task: afrimmlu_translate_swa_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab3ea80c0610b333c11b67712e6bc5284994bbed
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrimmlu_translate
+task: afrimmlu_translate_twi_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2cd495e81df06383612c1278b18faeb0ac5c567f
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrimmlu_translate
+task: afrimmlu_translate_wol_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9af97c083d1230687a6ea6a01c29a3024a79760
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrimmlu_translate
+task: afrimmlu_translate_xho_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6e227431566f0c5187e0baeeaeb1e82838db0469
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrimmlu_translate
+task: afrimmlu_translate_yor_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a08884b8f40dc91798bdbba0ddf066862e8a2d76
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/afrimmlu_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrimmlu_translate
+task: afrimmlu_translate_zul_prompt_5
diff --git a/lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py b/lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..147225bb70653d67663ac1762a7cd6246c4e9f22
--- /dev/null
+++ b/lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py
@@ -0,0 +1,28 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_choice(doc):
+    choices = eval(doc["choices"])
+    return choices
+
+
+def doc_to_text(doc):
+    output = """Given your proficiency in {subject}, please answer the subsequent multiple-choice question with 'A', 'B', 'C', or 'D'.
+Question: {question}
+Choices:
+        A: {choice1}
+        B: {choice2}
+        C: {choice3}
+        D: {choice4}
+Answer: """
+
+    choices = eval(doc["choices"])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
+    return text
diff --git a/lm_eval/tasks/afrimmlu/translate/utils.py b/lm_eval/tasks/afrimmlu/translate/utils.py
deleted file mode 100644
index 9d02b342b2e3c9f3d3bd66d3f62330aa53c9159c..0000000000000000000000000000000000000000
--- a/lm_eval/tasks/afrimmlu/translate/utils.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from lm_eval.utils import weighted_f1_score
-
-
-def doc_to_choice(doc):
-    choices = eval(doc["choices"])
-    return choices
-
-
-def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence
-                model answers multiple-choice questions about '{subject}'
-
-                Question: '''{question}'''
-
-                Choices:
-                        A: ''{choice1}'''
-                        B: ''{choice2}'''
-                        C: ''{choice3}'''
-                        D: ''{choice4}'''
-
-                Answer:  """
-
-    choices = eval(doc["choices"])
-    text = output.format(
-        subject=doc["subject"],
-        question=doc["question"],
-        choice1=choices[0],
-        choice2=choices[1],
-        choice3=choices[2],
-        choice4=choices[3],
-    )
-    return text
diff --git a/lm_eval/tasks/afrixnli/direct/afrixnli.yaml b/lm_eval/tasks/afrixnli/direct/afrixnli.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d85ccd128f752f7a1ab566aa28e90d5bbf545b66
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/afrixnli.yaml
@@ -0,0 +1,13 @@
+group: afrixnli-irokobench
+task:
+  - afrixnli_tasks_prompt_1
+  - afrixnli_tasks_prompt_2
+  - afrixnli_tasks_prompt_3
+  - afrixnli_tasks_prompt_4
+  - afrixnli_tasks_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_amh.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..39f727b4ccb7c07eb0b2f6b8d2472764446767d4
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_amh.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_eng.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..593c57a34ec0f01d3c03e447acda48cd1644231b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_eng.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ewe.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b6a10baae753575ebb228a6df34e4faf364efea1
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ewe.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_fra.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08b2b5243633f276487d8d5595382211870eedf9
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_fra.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_hau.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe234b72694fdfde8474863d81265e851a350368
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_hau.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ibo.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d282e0e5f84433b77f8954acaa4e65c9ccbf5ba4
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_ibo.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_kin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cfdff6c8c64e6a91a869386f71b6f6024c0ac156
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_kin.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..410cb29f80366d78d0ec4fb9e240a9df0ec20372
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lin.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lug.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5665e37cce68d072cf8b64be5a787aed23fd70b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_lug.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_orm.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12751c7f93de40a8bc91431de573be9f99868ab4
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_orm.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_orm_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_sna.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d00bbb6f9d146effdf5de3112db4c56f79002166
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_sna.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_sna_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_sot.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ae346aed8ff8e53b94252385443b54fe4364595
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_sot.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_sot_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_swa.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca6729bf80cad6e95027c7c0e994cd1da14d0d6d
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_swa_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_twi.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7dc85428dab2ed5da0cb6fa17b0a428088f346f1
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_twi.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_twi_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_wol.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78ef254aeed1d393efca6390fa574aa41eac5f21
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_wol.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_wol_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_xho.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb0a8527741f4b8f3ac1fb7c2741a6cf5e2c64ae
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_xho.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_xho_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81c9eeaa5af0740cc32122519f671c4d0425c080
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_yaml
@@ -0,0 +1,30 @@
+tag:
+    - afrixnli_tasks
+    - afrixnli_tasks_prompt_1
+dataset_path: masakhane/afrixnli
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_yor.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..473aea37a7b036d7ef219eca756482cd2bac754b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_yor.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_yor_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_zul.yaml b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa07a8c991d56a0cef3dc8453017649952715f8a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/afrixnli_zul.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_yaml
+task: afrixnli_zul_prompt_1
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_1/utils.py b/lm_eval/tasks/afrixnli/direct/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d97a0a288508e817ab695e637fb157a08c813808
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_1/utils.py
@@ -0,0 +1,19 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+
+    Is it entailment, contradiction, or neutral?"""
+
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
+    return text
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_amh.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbf916b25d43db2fdc476c27fe5f2e8e02c45625
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_eng.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dfa8ebfe8815a58c1a043b328a22f762a739b9d2
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_eng.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: eng
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_ewe.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..995ef3e65894548266a72f45417222e2760e30fa
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_fra.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce72588c19901c04ee82479206f54816fa358915
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_hau.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..369ee58bedfd98fd95c63510c3a84eec10238df0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_ibo.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e118c613ebf2d0388298fe6ba750923816ba4af6
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_kin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81f6d803d6762f5a6b86dae00ec0b26040a943ac
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_lin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d99c2eb57aedcce988f37415c414882d5bb4186
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_lug.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..31325539e1dd778da5e057436aeb3b60d7531a58
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_orm.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4ad555afafe2b99470d706e0eb46dc8256037fb
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrixnli_yaml
+task: afrixnli_orm_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_sna.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a780b0c428c822c08d5bb16dd909cb883da494a2
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrixnli_yaml
+task: afrixnli_sna_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_sot.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..94e78880d31b48ce8dc4e562a9d9cc3643208535
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrixnli_yaml
+task: afrixnli_sot_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_swa.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8622e2833c24290007405dc90043f0c5b6ced7ad
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrixnli_yaml
+task: afrixnli_swa_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_twi.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4219b81ee8a1de24535cf2cd6eae4643e660d0de
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrixnli_yaml
+task: afrixnli_twi_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_wol.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..546b17904959bd83c1f26617a9a13b79fc654a55
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrixnli_yaml
+task: afrixnli_wol_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_xho.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..649c61df93eef6b6828d7da9c5a672bc5fce9611
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrixnli_yaml
+task: afrixnli_xho_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cfab642bf9175d3066879680618e95f097a609a2
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_yaml
@@ -0,0 +1,34 @@
+tag:
+    - afrixnli_tasks
+    - afrixnli_tasks_prompt_2
+dataset_path: masakhane/afrixnli
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
+# True = entailment
+# False = contradiction
+# Neither = neutral
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "True"
+  - "Neither"
+  - "False"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_yor.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53f23ace6bbb25c6457f7bd4e5b760b7ebb8b298
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrixnli_yaml
+task: afrixnli_yor_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_zul.yaml b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd89fe131e26f4b0d1dedb1b86aeac72f8f706d6
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/afrixnli_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrixnli_yaml
+task: afrixnli_zul_prompt_2
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_2/utils.py b/lm_eval/tasks/afrixnli/direct/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d1ac19e19b2e855c957e75f1c778366dfbc7e55
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_2/utils.py
@@ -0,0 +1,6 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    replacements = {0: "True", 1: "Neither", 2: "False"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_amh.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3ff9f99c187a914fde7514c7c4caf49cb63c4186
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_amh.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Given the following premise and hypothesis in Amharic, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_eng.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a53aea6dbd9c3fedd9a812fc8f698b5f16d41bf3
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_eng.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Given the following premise and hypothesis in English, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_ewe.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54b58ae6e972774be55be9988a20d6962a7e56ff
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_ewe.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Given the following premise and hypothesis in Ewe, identify if the premise\
+  \ entails, contradicts, or is neutral towards the hypothesis. Please respond with\
+  \ exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \n\
+  Hypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_fra.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fedb519ec8421d77aedb24215de643544614bf70
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_fra.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Given the following premise and hypothesis in French, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_hau.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a9ebb95181426ab8a9138267a8400c37825e76e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_hau.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Given the following premise and hypothesis in Hausa, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_ibo.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b61f7678a3ab596bbda1b6039129e2e71b6bda6
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Given the following premise and hypothesis in Igbo, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_kin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1bd0829bfaf89354c5814eeffe1d4de8432fa540
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_kin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Given the following premise and hypothesis in Kinyarwanda, identify\
+  \ if the premise entails, contradicts, or is neutral towards the hypothesis. Please\
+  \ respond with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_lin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..697c439fa18a1ecd80e41fca14bc0836d956cfde
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_lin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Given the following premise and hypothesis in Lingala, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_lug.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b5667c0720473f0ab7703b17777b5ced0459381
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_lug.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Given the following premise and hypothesis in Luganda, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_orm.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..37a6d843e51870f6ad2845e1f385b6aacef2fab2
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_orm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Given the following premise and hypothesis in Oromo, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_orm_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_sna.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7e0f0b05000c40fbebca19921a2486a57d8054a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_sna.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Given the following premise and hypothesis in chiShona, identify if\
+  \ the premise entails, contradicts, or is neutral towards the hypothesis. Please\
+  \ respond with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sna_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_sot.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c0ccd9e64ee0b807fef742c873126666327f276
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_sot.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Given the following premise and hypothesis in Sesotho, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sot_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_swa.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dabd96ef2e1cbf6df83432cc057382d40e448ff7
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_swa.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Given the following premise and hypothesis in Swahili, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_swa_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_twi.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4d3158d45209fe96a7e0b7520d065c9108a0798b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_twi.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Given the following premise and hypothesis in Twi, identify if the premise\
+  \ entails, contradicts, or is neutral towards the hypothesis. Please respond with\
+  \ exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \n\
+  Hypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_twi_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_wol.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51fbdc79b0987386839599a3521bb2d564256e83
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_wol.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Given the following premise and hypothesis in Wolof, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_wol_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_xho.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00ca9d17934256c4169ba0da95f7a604c60ac037
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_xho.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Given the following premise and hypothesis in isiXhosa, identify if\
+  \ the premise entails, contradicts, or is neutral towards the hypothesis. Please\
+  \ respond with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_xho_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04609ac3c424b323858858fdbffbd83ccec52b7e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_yaml
@@ -0,0 +1,30 @@
+tag:
+    - afrixnli_tasks
+    - afrixnli_tasks_prompt_3
+dataset_path: masakhane/afrixnli
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_yor.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d8b2f847f473ad2d1857e924495e98fffbc9edd
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_yor.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Given the following premise and hypothesis in Yoruba, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_yor_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_zul.yaml b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83b87141b4021baad581be7d6b60375c40ef73c5
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/afrixnli_zul.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Given the following premise and hypothesis in Zulu, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_zul_prompt_3
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_3/utils.py b/lm_eval/tasks/afrixnli/direct/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..422ed169bffa2777cd91307c3ab097619e4d5399
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_3/utils.py
@@ -0,0 +1,6 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_amh.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63b05465144af310263939fea2b8335672dbb7ae
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_amh.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Amharic language.\nAnalyze the premise and hypothesis given in Amharic, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_eng.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1ecb06d10497274dde56ab73302525add553254a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_eng.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the English language.\nAnalyze the premise and hypothesis given in English, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_ewe.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64157b549956f932b3ad5bd2f67610378d683596
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_ewe.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Ewe language.\nAnalyze the premise and hypothesis given in Ewe, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_fra.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78da10cf7e04482eef5c0f9517a176196681c103
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_fra.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the French language.\nAnalyze the premise and hypothesis given in French, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_hau.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..811a0fca1364f55e5ba3dfe37e7d9c99e7090e6a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_hau.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Hausa language.\nAnalyze the premise and hypothesis given in Hausa, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_ibo.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..73fdba2fba55cc9af3bb802e50562de8ceb9a97e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Igbo language.\nAnalyze the premise and hypothesis given in Igbo, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_kin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f975d82b4a3b2ee0898aa8cc9aec225b3bb26e2d
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_kin.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Kinyarwanda language.\nAnalyze the premise and hypothesis given in Kinyarwanda,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_lin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63062ac444bcd71951152416384ae5852510decb
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_lin.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Lingala language.\nAnalyze the premise and hypothesis given in Lingala, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_lug.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1553c620009ec7374378e584e5f7523ff6d57306
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_lug.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Luganda language.\nAnalyze the premise and hypothesis given in Luganda, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_orm.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba2a377b7fb03f6cd1546fe8f1b65549d2133d6c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_orm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Oromo language.\nAnalyze the premise and hypothesis given in Oromo, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_orm_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sna.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..afce6e955b21eb514b4dfd024d7a8d115a3377ba
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sna.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the chiShona language.\nAnalyze the premise and hypothesis given in chiShona,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sna_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sot.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40c7cf8476813d29347fdd8e14785cb61a48c172
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_sot.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Sesotho language.\nAnalyze the premise and hypothesis given in Sesotho, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sot_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_swa.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c28aaae79accde1d796fb5dff56f8998130df0b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_swa.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Swahili language.\nAnalyze the premise and hypothesis given in Swahili, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_swa_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_twi.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f9835314e7fc979010c80e6be80ebd616eb3abff
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_twi.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Twi language.\nAnalyze the premise and hypothesis given in Twi, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_twi_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_wol.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b535bc2d45555821810abb91755fb2afbae9bd1
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_wol.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Wolof language.\nAnalyze the premise and hypothesis given in Wolof, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_wol_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_xho.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45f55e0e1441fe40f0a6fcecec0309d9c3013dc3
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_xho.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the isiXhosa language.\nAnalyze the premise and hypothesis given in isiXhosa,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_xho_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe5de1a6dd271c23b14712b09ab070ec848b753b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yaml
@@ -0,0 +1,30 @@
+tag:
+    - afrixnli_tasks
+    - afrixnli_tasks_prompt_4
+dataset_path: masakhane/afrixnli
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yor.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63d4f60642c71a1d881af27291a73e04b4abca34
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_yor.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Yoruba language.\nAnalyze the premise and hypothesis given in Yoruba, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_yor_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_zul.yaml b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b4a232e395e36f6180a81247b23b624efcdbd05
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/afrixnli_zul.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Zulu language.\nAnalyze the premise and hypothesis given in Zulu, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_zul_prompt_4
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_4/utils.py b/lm_eval/tasks/afrixnli/direct/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d97a0a288508e817ab695e637fb157a08c813808
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_4/utils.py
@@ -0,0 +1,19 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+
+    Is it entailment, contradiction, or neutral?"""
+
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
+    return text
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_amh.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70873a211527bec45adf7c689deef653eb3cfe07
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_amh.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_amh_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_eng.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..675264a8dc0da305ec2d92ff16a8393fd9bd0729
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_eng_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ewe.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f60db0bffdd4ed7c367a10c6e365707a02348a4
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ewe.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ewe_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_fra.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2bb558dabcb62d4e7c49c54c100986703fdc88ad
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_fra.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_fra_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_hau.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..728ae1b805f2ac9f014200ad59c82b6e822ca884
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_hau_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ibo.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3086b9b4f3c31121e089a8f0bc4c9e9ee4c1cc4a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_ibo_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_kin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13a8845cf1ca22f4a25c79931d526a3305a2172c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_kin.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_kin_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lin.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0250f29f300f3f0d312744b0c7a83bfbcc1bc55
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lin.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lin_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lug.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..928b74ce4fce73f952ac71999b4dbfc83c9632cb
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_lug.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_lug_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_orm.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f7f555db795996ac482deaae924db8af58e5c123
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_orm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_orm_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_sna.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac0ef3007edbcdcaf6a705202565ec1d842889a0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_sna.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sna_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_sot.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21fcdde5b66733a9f488c12b207545241b7ee7e6
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_sot.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_sot_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_swa.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d5824adcf3373864aa3ecf660952f667c648ea8
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_swa.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_swa_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_twi.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b519ef71eec7ddf91fc2e247021779edfec29145
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_twi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_twi_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_wol.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a865c8b166b19e458c3ff68138f74e48f8ce6b60
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_wol.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_wol_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_xho.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1891bfd0592a8d3c6e2f0e98619bbeee234a852f
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_xho.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_xho_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13e2b6ef7244d2689d7c56146aa15328e792c2fc
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_yaml
@@ -0,0 +1,30 @@
+tag:
+    - afrixnli_tasks
+    - afrixnli_tasks_prompt_5
+dataset_path: masakhane/afrixnli
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "true"
+  - "inconclusive"
+  - "false"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_yor.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4510441b606a8dbc0e635a00c3a009c4f891bd23
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_yor_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_zul.yaml b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2aa872b0410f56ea9e7ea19c4fb3d5adf93d323d
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/afrixnli_zul.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_yaml
+task: afrixnli_zul_prompt_5
diff --git a/lm_eval/tasks/afrixnli/direct/prompt_5/utils.py b/lm_eval/tasks/afrixnli/direct/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b9cb312b25a4c21bdd3d6a5e0a4e8e160451e4a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/direct/prompt_5/utils.py
@@ -0,0 +1,6 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    replacements = {0: "true", 1: "false", 2: "inconclusive"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/gen_utils.py b/lm_eval/tasks/afrixnli/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..338b4f9daf9e0724b0d87a7f2182fac956e245ca
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/gen_utils.py
@@ -0,0 +1,129 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Please identify whether the premise entails or contradicts the hypothesis in the following premise "
+        "and hypothesis. The answer should be exact entailment, contradiction, or neutral.\n\nPremise: {premise}\nHypothesis: {hypothesis}\n\n"
+        "Is it entailment, contradiction, or neutral?",
+        "prompt_3": f"Given the following premise and hypothesis in {lang}, identify if the premise entails, contradicts, "
+        f"or is neutral towards the hypothesis. Please respond with exact 'entailment', 'contradiction', or 'neutral'. \n\n"
+        "Premise: {{premise}} \nHypothesis: {{hypothesis}}",
+        "prompt_4": f"You are an expert in Natural Language Inference (NLI) specializing in the {lang} language.\n"
+        f"Analyze the premise and hypothesis given in {lang}, and determine the relationship between them.\n "
+        f"Respond with one of the following options: 'entailment', 'contradiction', or 'neutral'. \n\n"
+        "Premise: {{premise}} \nHypothesis: {{hypothesis}}",
+        "prompt_5": "Based on the given statement, is the following claim 'true', 'false', or 'inconclusive'. \n"
+        "Statement: {{premise}} \nClaim: {{hypothesis}}",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "eng": "English",
+        "amh": "Amharic",
+        "ibo": "Igbo",
+        "fra": "French",
+        "sna": "chiShona",
+        "wol": "Wolof",
+        "ewe": "Ewe",
+        "lin": "Lingala",
+        "lug": "Luganda",
+        "xho": "isiXhosa",
+        "kin": "Kinyarwanda",
+        "twi": "Twi",
+        "zul": "Zulu",
+        "orm": "Oromo",
+        "yor": "Yoruba",
+        "hau": "Hausa",
+        "sot": "Sesotho",
+        "swa": "Swahili",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"afrixnli_{lang}.yaml"
+            task_name = f"afrixnli_{lang}_{mode}"
+            yaml_template = "afrixnli_yaml"
+            if output_dir.split("/")[-1] == "translate":
+                file_name = f"afrixnli_translate_{lang}.yaml"
+                task_name = f"afrixnli_translate_{lang}_{mode}"
+                yaml_template = "afrixnli_translate_yaml"
+            if int(mode.split("_")[-1]) == 1 or int(mode.split("_")[-1]) > 2:
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": lang,
+                    "doc_to_text": prompt_func(mode, languages[lang]),
+                }
+            else:
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": lang,
+                }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./translate",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_5",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrixnli/translate/afrixnli_tt.yaml b/lm_eval/tasks/afrixnli/translate/afrixnli_tt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba507b39d0320bc6307062fc3158a2f1d9212c84
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/afrixnli_tt.yaml
@@ -0,0 +1,9 @@
+group: afrixnli_tt-irokobench
+task:
+  - afrixnli_tt_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_amh.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..92ef8df7270120b2ead5d3ece0d9cffc2bfc1741
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_amh.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_amh_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_ewe.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa32dd72a6bd7412eae5fb94ccb3d5af06402a1c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_ewe.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ewe_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_fra.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dc72af6f3dbcc41ab83a2adcf384b952e97a4d5
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_fra.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_fra_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_hau.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77f22faf2789d56de00b4a226832e2cb3d401362
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_hau.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_hau_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_ibo.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a7ac8793976d7722375f01f43f587b74e9654ec2
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_ibo.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ibo_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_kin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a52861402a8c3d65ccbe025fa62807d86e89b14
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_kin.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_kin_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_lin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb2a667e864afb856ec85ecd6b300378b44f8050
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_lin.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lin_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_lug.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf45d957a3d4f169630b3c3405b348a0ceefe1b1
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_lug.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lug_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_orm.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14b20a1c35ae63adddddbc4a0b8d4e1fba2c90b7
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_orm.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_orm_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_sna.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13103dd7a2f339f02b280dd3c67d8ec27807c86a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_sna.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sna_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_sot.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97cf3cba569f25219a7cdccb2be7a1c6effae0c9
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_sot.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sot_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_swa.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..824bb17aa23f3a1a546d369a6d3118272ee05c54
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_swa_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_twi.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d971c3e5fe6ca65e7568da81b647d2ff8f20696
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_twi.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_twi_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_wol.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..877787a88c0e8ee0c2d7d87d97aeff2d91e1330f
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_wol.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_wol_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_xho.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c907a2bf453d99c048822ad93feb12781004d2d0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_xho.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_xho_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..832b51493a0f17996ebca680f7151e38b59168d3
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_yaml
@@ -0,0 +1,27 @@
+tag: afrixnli_tt_tasks
+dataset_path: masakhane/afrixnli-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_split: test
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_yor.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c0ec7c91ff7d81a500bb1d335fe06406a3c94e0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_yor.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_yor_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_zul.yaml b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78753d1fe3ef143c547780041e70cc03d20289f1
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/afrixnli_translate_zul.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Please identify whether the premise entails or contradicts the hypothesis
+  in the following premise and hypothesis. The answer should be exact entailment,
+  contradiction, or neutral.
+
+
+  Premise: {premise}
+
+  Hypothesis: {hypothesis}
+
+
+  Is it entailment, contradiction, or neutral?'
+include: afrixnli_translate_yaml
+task: afrixnli_translate_zul_prompt_1
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_1/utils.py b/lm_eval/tasks/afrixnli/translate/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d97a0a288508e817ab695e637fb157a08c813808
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_1/utils.py
@@ -0,0 +1,19 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+
+    Is it entailment, contradiction, or neutral?"""
+
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
+    return text
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_amh.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0810f6b37b9c83815fab4de50d3bc42b2c01624e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrixnli_translate_yaml
+task: afrixnli_translate_amh_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_ewe.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7aec16a61ab04415327addd832893ab6e4e531c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ewe
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ewe_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_fra.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f17a4ecf0a3fe044c04fff53ef61f5946bf744b1
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_fra.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fra
+include: afrixnli_translate_yaml
+task: afrixnli_translate_fra_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_hau.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..688778c3195c54868f0f3d1d9f56e17c167205f9
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrixnli_translate_yaml
+task: afrixnli_translate_hau_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_ibo.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5667b3d0624c569bf9e392a67780a80fdf5aee3b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ibo_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_kin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a74950cc34121f63d3dd944a310b7656bc2ff894
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrixnli_translate_yaml
+task: afrixnli_translate_kin_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_lin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..27e88a5b7664a515e7d6cc901962f201913670d6
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_lin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lin
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lin_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_lug.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63ff988c3aea7178e06d85ca20fea40c87e9dbcc
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: lug
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lug_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_orm.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db1a3ea1cbeff8ddd8c5b2ba49d3d553410c1b06
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrixnli_translate_yaml
+task: afrixnli_translate_orm_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_sna.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa110774a6fa86e29253307163e22c82f35a9ac0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sna
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sna_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_sot.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3133308b2810a3a61e1037ebe58c02ba0da0a2ef
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_sot.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: sot
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sot_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_swa.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..926f91321cfcade46ca52492cba83daa348bc746
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrixnli_translate_yaml
+task: afrixnli_translate_swa_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_twi.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c229de3dd2b40b902e7ef93de0cc06a6da04d8af
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrixnli_translate_yaml
+task: afrixnli_translate_twi_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_wol.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87844c49639fa350e52877d967f64d5ea95cf28e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: wol
+include: afrixnli_translate_yaml
+task: afrixnli_translate_wol_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_xho.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63fa3ffc1c69d89637dca73da5050c840593aa4a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: xho
+include: afrixnli_translate_yaml
+task: afrixnli_translate_xho_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ad87afcd99a5c875ccf1f7d2761afdea2a118aa
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_yaml
@@ -0,0 +1,31 @@
+tag: afrixnli_tt_tasks
+dataset_path: masakhane/afrixnli-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_split: test
+doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
+# True = entailment
+# False = contradiction
+# Neither = neutral
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "True"
+  - "Neither"
+  - "False"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_yor.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7dfc9bd6d46fb8982402dd85531d1d312a8a07b8
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrixnli_translate_yaml
+task: afrixnli_translate_yor_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_zul.yaml b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0878c4e03e985eced95c234fa079c3d92999a982
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/afrixnli_translate_zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: zul
+include: afrixnli_translate_yaml
+task: afrixnli_translate_zul_prompt_2
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_2/utils.py b/lm_eval/tasks/afrixnli/translate/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d1ac19e19b2e855c957e75f1c778366dfbc7e55
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_2/utils.py
@@ -0,0 +1,6 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    replacements = {0: "True", 1: "Neither", 2: "False"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_amh.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6fb06d0f1f7dc3c62e7c51a6395b1a79f1759878
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_amh.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Given the following premise and hypothesis in Amharic, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_amh_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_ewe.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d550f9daff83651b4099d3ad4228de0afab6ac4
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_ewe.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Given the following premise and hypothesis in Ewe, identify if the premise\
+  \ entails, contradicts, or is neutral towards the hypothesis. Please respond with\
+  \ exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \n\
+  Hypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ewe_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_fra.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3156466c388362d06414b613e963f0f9fcb1465f
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_fra.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Given the following premise and hypothesis in French, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_fra_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_hau.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ae3c21e2100b4b07428f67c23070d51d32761b0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_hau.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Given the following premise and hypothesis in Hausa, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_hau_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_ibo.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28696f337841ae738354ae11bf537f07743c49f5
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Given the following premise and hypothesis in Igbo, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ibo_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_kin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6981da83291b927f5fd5908f62e039227ded3a9e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_kin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Given the following premise and hypothesis in Kinyarwanda, identify\
+  \ if the premise entails, contradicts, or is neutral towards the hypothesis. Please\
+  \ respond with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_kin_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_lin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1984416f0a828871d5e29ca46bde91c300440feb
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_lin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Given the following premise and hypothesis in Lingala, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lin_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_lug.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32a7ad2a161818c6d84b9002d6690b95cc86af3c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_lug.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Given the following premise and hypothesis in Luganda, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lug_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_orm.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3923a80cdc78c59dabb63bb3fe9dda6e8e572a9
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_orm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Given the following premise and hypothesis in Oromo, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_orm_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_sna.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7dbf17e8b0520fc15bc6d6b337c959f828268ef
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_sna.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Given the following premise and hypothesis in chiShona, identify if\
+  \ the premise entails, contradicts, or is neutral towards the hypothesis. Please\
+  \ respond with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sna_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_sot.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4e89ec90608180f281765d321c6c1b0220171e0
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_sot.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Given the following premise and hypothesis in Sesotho, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sot_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_swa.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3c5243b35264d5b26065a63c9c3babb8c714ec7
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_swa.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Given the following premise and hypothesis in Swahili, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_swa_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_twi.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7e8568701be5958b2da080d5d6c0885e83bb370
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_twi.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Given the following premise and hypothesis in Twi, identify if the premise\
+  \ entails, contradicts, or is neutral towards the hypothesis. Please respond with\
+  \ exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \n\
+  Hypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_twi_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_wol.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2cf0b08eff851ad0832adad093f6338222f3bc74
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_wol.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Given the following premise and hypothesis in Wolof, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_wol_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_xho.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4dafa34f6cd5b69e115de88e04cd24b3473c5fd
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_xho.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Given the following premise and hypothesis in isiXhosa, identify if\
+  \ the premise entails, contradicts, or is neutral towards the hypothesis. Please\
+  \ respond with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_xho_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..832b51493a0f17996ebca680f7151e38b59168d3
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_yaml
@@ -0,0 +1,27 @@
+tag: afrixnli_tt_tasks
+dataset_path: masakhane/afrixnli-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_split: test
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_yor.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5c01ca54eaba74fd968ce7847f43b2fe4b373fd
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_yor.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Given the following premise and hypothesis in Yoruba, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_yor_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_zul.yaml b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fdbbec809a0b21cabc58defd6cbac15d0ea29ff2
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/afrixnli_translate_zul.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Given the following premise and hypothesis in Zulu, identify if the\
+  \ premise entails, contradicts, or is neutral towards the hypothesis. Please respond\
+  \ with exact 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_zul_prompt_3
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_3/utils.py b/lm_eval/tasks/afrixnli/translate/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c455a3045a9be8b7318b96e23d9f061add6a342e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_3/utils.py
@@ -0,0 +1,21 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """You are an NLP assistant whose purpose is to solve Natural Language Inference (NLI) problems
+
+    Please identify whether the premise entails or contradicts the hypothesis in the following premise
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+
+    Is it entailment, contradiction, or neutral?"""
+
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
+    return text
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_amh.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5f972e7070ac1ed50b7ed177daa706d520e3a4f
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_amh.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Amharic language.\nAnalyze the premise and hypothesis given in Amharic, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_amh_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_ewe.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ebc775dd705e572f68835e194b6c3c8d745f6e1b
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_ewe.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Ewe language.\nAnalyze the premise and hypothesis given in Ewe, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ewe_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_fra.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ad718c708ddb9c82c3e0ac35c7b805c0c364a64
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_fra.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the French language.\nAnalyze the premise and hypothesis given in French, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_fra_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_hau.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd65f366bc2de430404434c7918e3a0bb69aba03
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_hau.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Hausa language.\nAnalyze the premise and hypothesis given in Hausa, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_hau_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_ibo.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13df12642e743ceb4867a4d515ed2d20edce7486
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Igbo language.\nAnalyze the premise and hypothesis given in Igbo, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ibo_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_kin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..198d88750287ee0fc28507ddcdab0146b1c2f734
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_kin.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Kinyarwanda language.\nAnalyze the premise and hypothesis given in Kinyarwanda,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_kin_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_lin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b25856cfdcbb7b0c0df2aba81322d650585e9d9a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_lin.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Lingala language.\nAnalyze the premise and hypothesis given in Lingala, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lin_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_lug.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..633c173c0b75903c81934391e1e9f07a8de9b7f7
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_lug.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Luganda language.\nAnalyze the premise and hypothesis given in Luganda, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lug_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_orm.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e63f93eb89351af076593eff1623123301bbda9c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_orm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Oromo language.\nAnalyze the premise and hypothesis given in Oromo, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_orm_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_sna.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6fcb4e063159ab48e9e73423b895658a74cafa9e
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_sna.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the chiShona language.\nAnalyze the premise and hypothesis given in chiShona,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sna_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_sot.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..358e4b353eb73065bf91b5471127faf5b2f1675f
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_sot.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Sesotho language.\nAnalyze the premise and hypothesis given in Sesotho, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sot_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_swa.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ce271ed77c52dbce949a6a83ccd1313d26c9b25
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_swa.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Swahili language.\nAnalyze the premise and hypothesis given in Swahili, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_swa_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_twi.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8171e0daa98aa93614800f3c77a42d2b699ee2a7
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_twi.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Twi language.\nAnalyze the premise and hypothesis given in Twi, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_twi_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_wol.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2662662dff2263023dd2bce1afeb86bb09ce262
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_wol.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Wolof language.\nAnalyze the premise and hypothesis given in Wolof, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_wol_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_xho.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5aa3a9d171a4dabe831d5a6126ca61384d218d81
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_xho.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the isiXhosa language.\nAnalyze the premise and hypothesis given in isiXhosa,\
+  \ and determine the relationship between them.\n Respond with one of the following\
+  \ options: 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}}\
+  \ \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_xho_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..832b51493a0f17996ebca680f7151e38b59168d3
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_yaml
@@ -0,0 +1,27 @@
+tag: afrixnli_tt_tasks
+dataset_path: masakhane/afrixnli-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_split: test
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "entailment"
+  - "neutral"
+  - "contradiction"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_yor.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..478e5043431c73fe2448474b593ae02002d6a722
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_yor.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Yoruba language.\nAnalyze the premise and hypothesis given in Yoruba, and\
+  \ determine the relationship between them.\n Respond with one of the following options:\
+  \ 'entailment', 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis:\
+  \ {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_yor_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_zul.yaml b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0dc06e6bce0aae18769cd2259aee6699be3b0bd
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/afrixnli_translate_zul.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "You are an expert in Natural Language Inference (NLI) specializing in\
+  \ the Zulu language.\nAnalyze the premise and hypothesis given in Zulu, and determine\
+  \ the relationship between them.\n Respond with one of the following options: 'entailment',\
+  \ 'contradiction', or 'neutral'. \n\nPremise: {{premise}} \nHypothesis: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_zul_prompt_4
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_4/utils.py b/lm_eval/tasks/afrixnli/translate/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d97a0a288508e817ab695e637fb157a08c813808
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_4/utils.py
@@ -0,0 +1,19 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
+    and hypothesis. The answer should be exact entailment, contradiction, or neutral.
+
+    Premise: {premise}
+    Hypothesis: {hypothesis}
+
+    Is it entailment, contradiction, or neutral?"""
+
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
+    return text
+
+
+def doc_to_target(doc):
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_amh.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3079712ce60b0b9b7e5846d3e1d9b16383c8cf97
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_amh.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_amh_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_ewe.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6eb452db53361c5c048e75a807de20b3528414ec
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_ewe.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ewe_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_fra.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6ddf49332ba4ec1671a7a20e036e7d4906c2097
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_fra.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_fra_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_hau.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09d182f7f12e9debba53ed9bd5b1249c38b63a53
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_hau_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_ibo.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5bf1555a454879018ee16c3ed60dd1f71cbdbbe
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_ibo_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_kin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0cbe9c2c78612111af3ce29e4a8b846879f3060
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_kin.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_kin_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_lin.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..159116be57762adddff7368d1731a867a8daa152
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_lin.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lin_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_lug.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9448fa28cb2561631dfa78fbeccb4bc054c867f6
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_lug.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_lug_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_orm.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64621cb4b784c2e4c16bfe220acb69d7ea17cb8f
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_orm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_orm_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_sna.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..788bae3068b2b40ed1ae4419a8ddbfc9094fe71c
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_sna.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sna_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_sot.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..617dd9f88db6d283eef224126b36a0fcf2e35158
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_sot.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_sot_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_swa.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81a159252ca45302bf5c88c448489a76bd342270
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_swa.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_swa_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_twi.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb9f115fb3c01ea8a509a7347f6a369ae1f9c819
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_twi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_twi_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_wol.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5f4eb0c2eeaf10d04b2978cf3ffb821a7123889
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_wol.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_wol_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_xho.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d085919b777d18fff97397bdd32d1c0cf2c1c316
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_xho.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_xho_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3047238439e371f99664aed214a6589b33528e66
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_yaml
@@ -0,0 +1,27 @@
+tag: afrixnli_tt_tasks
+dataset_path: masakhane/afrixnli-translate-test
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_split: test
+doc_to_target: !function utils.doc_to_target
+doc_to_choice:
+  - "true"
+  - "inconclusive"
+  - "false"
+should_decontaminate: true
+doc_to_decontamination_query: premise
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    average: weighted
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_yor.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..107c663428d39e3eaa565315a40e4aa5f4b53201
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_yor_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_zul.yaml b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d963646034069b77a78fe5284b106b6f74718a6a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/afrixnli_translate_zul.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Based on the given statement, is the following claim 'true', 'false',\
+  \ or 'inconclusive'. \nStatement: {{premise}} \nClaim: {{hypothesis}}"
+include: afrixnli_translate_yaml
+task: afrixnli_translate_zul_prompt_5
diff --git a/lm_eval/tasks/afrixnli/translate/prompt_5/utils.py b/lm_eval/tasks/afrixnli/translate/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b9cb312b25a4c21bdd3d6a5e0a4e8e160451e4a
--- /dev/null
+++ b/lm_eval/tasks/afrixnli/translate/prompt_5/utils.py
@@ -0,0 +1,6 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    replacements = {0: "true", 1: "false", 2: "inconclusive"}
+    return replacements[doc["label"]]
diff --git a/lm_eval/tasks/afrobench/README.md b/lm_eval/tasks/afrobench/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a6ab3ceef1b37e94f1c191d1931648b7b669a49e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/README.md
@@ -0,0 +1,72 @@
+# AfroBench
+
+### Paper
+
+Title: `AfroBench: How Good are Large Language Models on African Languages?`
+
+Paper Link: https://arxiv.org/abs/2311.07978
+
+## Abstract
+> Large-scale multilingual evaluations, such as MEGA, often include only a handful of African languages due to the scarcity of high-quality evaluation data and the limited discoverability of existing African datasets. This lack of representation hinders comprehensive LLM evaluation across a diverse range of languages and tasks. To address these challenges, we introduce AfroBench -- a multi-task benchmark for evaluating the performance of LLMs across 64 African languages, 15 tasks and 22 datasets. AfroBench consists of nine natural language understanding datasets, six text generation datasets, six knowledge and question answering tasks, and one mathematical reasoning task. We present results comparing the performance of prompting LLMs to fine-tuned baselines based on BERT and T5-style models. Our results suggest large gaps in performance between high-resource languages, such as English, and African languages across most tasks; but performance also varies based on the availability of monolingual data resources. Our findings confirm that performance on African languages continues to remain a hurdle for current LLMs, underscoring the need for additional efforts to close this gap.
+
+HomePage: https://mcgill-nlp.github.io/AfroBench/
+
+### Groups, and Tasks
+#### Groups
+* `afrobench` : Runs all that tasks, datasets and prompts in this benchmark
+* `afrobench_lite`: Runs the lite version of the benchmark which includes; afrimgsm, afrimmlu, afrixnli, sib, intent, adr and flores
+
+Dataset specific grouping that listing all prompts, allowing users to review or edit them.
+* `adr`   `afrihate`   `afrisenti`   `belebele`  `african_flores` `injongointent`  `mafand`  `masakhaner`  `masakhapos`  `naijarc`  `nollysenti`  `african_ntrex`  `openai_mmlu`  `salt`  `sib`  `uhura`  `xlsum`
+
+
+#### Task Tags
+* `adr_tasks`: all datasets in this benchmark relating to Automatic Diacritics Restoration task
+* `afrihate_tasks`: all datasets in this benchmark relating to Hate Speech detection task
+* `afrimgsm_tasks`: all datasets in this benchmark relating to Mathematical reasoning task
+* `afrixnli_tasks`: all datasets in this benchmark relating to Natural Language Inference task
+* `afrobench_xqa_tasks`: all datasets in this benchmark relating to Crosslingual QA (XQA) task
+* `afrobench_sentiment_tasks`: all datasets in this benchmark relating to Sentiment Classification task
+* `afrobench_MT_tasks`: all datasets in this benchmark relating to Machine Translation task
+* `afrobench_TC_tasks`: all datasets in this benchmark relating to Topic Classification task
+* `afrobench_mmlu_tasks`: all datasets in this benchmark relating to MMLU task
+* `injongointent_tasks`: all datasets in this benchmark relating to Intent Detection task
+* `masakhaner_tasks`: all datasets in this benchmark relating to Named Entity Recognition (NER) task
+* `masakhapos_tasks`: all datasets in this benchmark relating to Part of Speech Tagging (POS) task
+* `RC_tasks`: all datasets in this benchmark relating to Reading Comprehension task
+* `uhura_arc_easy_tasks`: all datasets in this benchmark relating to Arc-Easy (XQA) task
+* `xlsum_tasks`: all datasets in this benchmark relating to Summarization task
+
+
+We've included sample run scripts for easier integration with the benchmark: [sample run scripts](./sample_run_scripts)
+
+For better understanding of the run interface see [interface.md](../../../docs/interface.md)
+
+All dataset used in this benchmark are available at [huggingface](https://huggingface.co/collections/masakhane/afrobench-67dbf553ebf5701c2207f883)
+
+### Citation
+
+```
+@misc{ojo2025afrobenchgoodlargelanguage,
+      title={AfroBench: How Good are Large Language Models on African Languages?},
+      author={Jessica Ojo and Odunayo Ogundepo and Akintunde Oladipo and Kelechi Ogueji and Jimmy Lin and Pontus Stenetorp and David Ifeoluwa Adelani},
+      year={2025},
+      eprint={2311.07978},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2311.07978},
+}
+```
+Please cite datasets used. Citations for individual datasets are included in their respective repository readme files within this benchmark.
+### Checklist
+
+For adding novel benchmarks/datasets to the library:
+* [ ] Is the task an existing benchmark in the literature?
+  * [ ] Have you referenced the original paper that introduced the task?
+  * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? The original paper doesn't have an associated implementation, but there is an official entry in [BigBench](https://github.com/google/BIG-bench/tree/main/bigbench/benchmark_tasks/social_iqa). I use the same prompting format as BigBench.
+
+
+If other tasks on this dataset are already supported:
+* [ ] Is the "Main" variant of this task clearly denoted?
+* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
+* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
diff --git a/lm_eval/tasks/afrobench/adr/README.md b/lm_eval/tasks/afrobench/adr/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cb09567dcd2f461e3adba531bddd570c21ebfdf5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/README.md
@@ -0,0 +1,7 @@
+# Automatic Diacritics Restoration (ADR)
+
+Automatic Diacritics Restoration (ADR) is the task of restoring diacritical marks in text where they have been omitted or removed.
+This process is essential for languages where diacritics alter pronunciation, meaning, or grammatical structure.
+ADR requires the model to have a deep understanding of linguistic context, syntax, and semantics to accurately predict and reinsert the appropriate diacritics.
+
+As part of this benchmark project, we utilise the mafand dataset to curate a dataset specifically for ADR. We focus on five languages: Gbomola, Fon, Igbo, Wolof, and Yoruba.
diff --git a/lm_eval/tasks/afrobench/adr/afridiacritics.yaml b/lm_eval/tasks/afrobench/adr/afridiacritics.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34d60eef66acd9829ebb6e60ca6b85e8616a32d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/afridiacritics.yaml
@@ -0,0 +1,13 @@
+group: adr
+task:
+  - adr_prompt_1
+  - adr_prompt_2
+  - adr_prompt_3
+  - adr_prompt_4
+  - adr_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/adr/gen_utils.py b/lm_eval/tasks/afrobench/adr/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff6e63e3456abf809a1068f4abeea8ac93b49e94
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/gen_utils.py
@@ -0,0 +1,105 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Please restore the missing diacritics in the following sentence: {{text}}. Return output sentence only",
+        "prompt_2": "Given a sentence without diacritics, add the appropriate diacritics to make it grammatically "
+        "and semantically correct. \nSentence: {{text}}. Return output sentence only",
+        "prompt_3": f"This text is in {lang}. Restore all diacritical marks to their proper places in the "
+        "following sentence: {{text}}. Return output sentence only",
+        "prompt_4": f"You are a linguist specializing in diacritical marks for {lang}. "
+        f"Add the appropriate diacritics to this {lang} sentence: "
+        "{{text}}. Return output sentence only",
+        "prompt_5": f"You are a linguist specializing in diacritical marks for {lang}. Diacritics are essential for "
+        f"proper pronunciation and meaning in {lang}. You are tasked with converting {lang} sentences  "
+        "without diacritics into their correctly accented forms. Here's the input: {{text}}. "
+        "Return output sentence only",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "fon": "Fon",
+        "bbj": "Gbomala",
+        "ibo": "Igbo",
+        "wol": "Wolof",
+        "yor": "Yoruba",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"afridiacritics_{lang}.yaml"
+            task_name = f"afridiacritics_{lang}_{mode}"
+            yaml_template = "afridiacritics_yaml"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_bbj.yaml b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3eb26ebae6f723c03591aa73eb29f2256fb0e4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_bbj.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: 'Please restore the missing diacritics in the following sentence: {{text}}.
+  Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_bbj_prompt_1
diff --git a/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_fon.yaml b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..874832d5d00799deb9235d2f04960684f2b91770
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_fon.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'Please restore the missing diacritics in the following sentence: {{text}}.
+  Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_fon_prompt_1
diff --git a/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_ibo.yaml b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..983bc3914c421f46fa1adbdd67c15c433996a584
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Please restore the missing diacritics in the following sentence: {{text}}.
+  Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_wol.yaml b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9067770a3dac5fc4aab23cdad2d15ede76b82de4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_wol.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'Please restore the missing diacritics in the following sentence: {{text}}.
+  Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_wol_prompt_1
diff --git a/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_yaml b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53cebaee05c9e7a65779ad12faaa0a9ee40c7c8b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_yaml
@@ -0,0 +1,25 @@
+tag:
+- adr_tasks
+- adr_prompt_1
+dataset_path: masakhane/diacritics-restoration
+dataset_kwargs: {trust_remote_code: True}
+doc_to_target: target
+output_type: generate_until
+fewshot_split: dev
+test_split: test
+training_split: train
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  do_sample: false
+  until:
+  - '<eos>'
+  - </s>
+  - <|im_end|>
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_yor.yaml b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8e98af10abec2009d32b112694923f45c17473af
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_1/afridiacritics_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Please restore the missing diacritics in the following sentence: {{text}}.
+  Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_bbj.yaml b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f054eea4c29da978b830d3a5eb2571af364f920
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_bbj.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Given a sentence without diacritics, add the appropriate diacritics\
+  \ to make it grammatically and semantically correct. \nSentence: {{text}}. Return\
+  \ output sentence only"
+include: afridiacritics_yaml
+task: afridiacritics_bbj_prompt_2
diff --git a/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_fon.yaml b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07f7114649ff8f362a0a2072995724290c5224bd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_fon.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: "Given a sentence without diacritics, add the appropriate diacritics\
+  \ to make it grammatically and semantically correct. \nSentence: {{text}}. Return\
+  \ output sentence only"
+include: afridiacritics_yaml
+task: afridiacritics_fon_prompt_2
diff --git a/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_ibo.yaml b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1ebac101f9a69722b21bbfe65ccd224f811e8d3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Given a sentence without diacritics, add the appropriate diacritics\
+  \ to make it grammatically and semantically correct. \nSentence: {{text}}. Return\
+  \ output sentence only"
+include: afridiacritics_yaml
+task: afridiacritics_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_wol.yaml b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8448d6ffce9f67252621cf6085fc575dace588e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Given a sentence without diacritics, add the appropriate diacritics\
+  \ to make it grammatically and semantically correct. \nSentence: {{text}}. Return\
+  \ output sentence only"
+include: afridiacritics_yaml
+task: afridiacritics_wol_prompt_2
diff --git a/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_yaml b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0cc722d890f6a64939417f39f860532c4cd342b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_yaml
@@ -0,0 +1,25 @@
+tag:
+- adr_tasks
+- adr_prompt_2
+dataset_path: masakhane/diacritics-restoration
+dataset_kwargs: {trust_remote_code: True}
+doc_to_target: target
+output_type: generate_until
+fewshot_split: dev
+test_split: test
+training_split: train
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  do_sample: false
+  until:
+  - '<eos>'
+  - </s>
+  - <|im_end|>
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_yor.yaml b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb95f5e234add5c178efc181bddb1fc87f9ce19d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_2/afridiacritics_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Given a sentence without diacritics, add the appropriate diacritics\
+  \ to make it grammatically and semantically correct. \nSentence: {{text}}. Return\
+  \ output sentence only"
+include: afridiacritics_yaml
+task: afridiacritics_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_bbj.yaml b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a50b40c535d778cb4bd564455fbfdcf43415a53d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_bbj.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: 'This text is in Gbomala. Restore all diacritical marks to their proper
+  places in the following sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_bbj_prompt_3
diff --git a/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_fon.yaml b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5b0909ce9dd69f46cdca75ebdc325f452b25a462
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_fon.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'This text is in Fon. Restore all diacritical marks to their proper places
+  in the following sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_fon_prompt_3
diff --git a/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_ibo.yaml b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04d1df0e1f07ac7c082bd75b1ce93959e0e0d56d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'This text is in Igbo. Restore all diacritical marks to their proper
+  places in the following sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_wol.yaml b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..576e0845188b523be7ec2f342a440174aa496263
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_wol.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'This text is in Wolof. Restore all diacritical marks to their proper
+  places in the following sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_wol_prompt_3
diff --git a/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_yaml b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a27eeef2d37880527c7b99f1fa9296f843b72a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_yaml
@@ -0,0 +1,25 @@
+tag:
+- adr_tasks
+- adr_prompt_3
+dataset_path: masakhane/diacritics-restoration
+dataset_kwargs: {trust_remote_code: True}
+doc_to_target: target
+output_type: generate_until
+fewshot_split: dev
+test_split: test
+training_split: train
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  do_sample: false
+  until:
+  - '<eos>'
+  - </s>
+  - <|im_end|>
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_yor.yaml b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..169c110872d6fdc2d2d41b6472fe30d93934f5df
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_3/afridiacritics_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'This text is in Yoruba. Restore all diacritical marks to their proper
+  places in the following sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_bbj.yaml b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7a807b09ee3000f022374e31e61cdb2f2e091f0e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_bbj.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: 'You are a linguist specializing in diacritical marks for Gbomala. Add
+  the appropriate diacritics to this Gbomala sentence: {{text}}. Return output sentence
+  only'
+include: afridiacritics_yaml
+task: afridiacritics_bbj_prompt_4
diff --git a/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_fon.yaml b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11076e685ae5f6a4435a486d3f35db269ded8f51
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_fon.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'You are a linguist specializing in diacritical marks for Fon. Add the
+  appropriate diacritics to this Fon sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_fon_prompt_4
diff --git a/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_ibo.yaml b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..367e387ae7456f57d604b1fe3ac032084b16fb98
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'You are a linguist specializing in diacritical marks for Igbo. Add the
+  appropriate diacritics to this Igbo sentence: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_wol.yaml b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23fb81e754445e8745d5d67720707af7d502e3df
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'You are a linguist specializing in diacritical marks for Wolof. Add
+  the appropriate diacritics to this Wolof sentence: {{text}}. Return output sentence
+  only'
+include: afridiacritics_yaml
+task: afridiacritics_wol_prompt_4
diff --git a/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_yaml b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ae62e9d3384d3ee1bff044dbfd1cb23275ae517
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_yaml
@@ -0,0 +1,25 @@
+tag:
+- adr_tasks
+- adr_prompt_4
+dataset_path: masakhane/diacritics-restoration
+dataset_kwargs: {trust_remote_code: True}
+doc_to_target: target
+output_type: generate_until
+fewshot_split: dev
+test_split: test
+training_split: train
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  do_sample: false
+  until:
+  - '<eos>'
+  - </s>
+  - <|im_end|>
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_yor.yaml b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21e3a53fefcb4ae41eb00a406a3319f14ed60aba
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_4/afridiacritics_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'You are a linguist specializing in diacritical marks for Yoruba. Add
+  the appropriate diacritics to this Yoruba sentence: {{text}}. Return output sentence
+  only'
+include: afridiacritics_yaml
+task: afridiacritics_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_bbj.yaml b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1bcc833c73d0a789700c1b50b8636163620ed27
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_bbj.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: 'You are a linguist specializing in diacritical marks for Gbomala. Diacritics
+  are essential for proper pronunciation and meaning in Gbomala. You are tasked with
+  converting Gbomala sentences  without diacritics into their correctly accented forms.
+  Here''s the input: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_bbj_prompt_5
diff --git a/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_fon.yaml b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a1c55f813b4c4b7d08daff74cf32040b85e2b35
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_fon.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'You are a linguist specializing in diacritical marks for Fon. Diacritics
+  are essential for proper pronunciation and meaning in Fon. You are tasked with converting
+  Fon sentences  without diacritics into their correctly accented forms. Here''s the
+  input: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_fon_prompt_5
diff --git a/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_ibo.yaml b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cc9865dca7a2d8889df88d73abfb54b615089f4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'You are a linguist specializing in diacritical marks for Igbo. Diacritics
+  are essential for proper pronunciation and meaning in Igbo. You are tasked with
+  converting Igbo sentences  without diacritics into their correctly accented forms.
+  Here''s the input: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_wol.yaml b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fed10a7031ac71a948720b15eff1677df411934c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_wol.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'You are a linguist specializing in diacritical marks for Wolof. Diacritics
+  are essential for proper pronunciation and meaning in Wolof. You are tasked with
+  converting Wolof sentences  without diacritics into their correctly accented forms.
+  Here''s the input: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_wol_prompt_5
diff --git a/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_yaml b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aaad3306e7270e78cdd2f83dd8ffeb790520134d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_yaml
@@ -0,0 +1,25 @@
+tag:
+- adr_tasks
+- adr_prompt_5
+dataset_path: masakhane/diacritics-restoration
+dataset_kwargs: {trust_remote_code: True}
+doc_to_target: target
+output_type: generate_until
+fewshot_split: dev
+test_split: test
+training_split: train
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  do_sample: false
+  until:
+  - '<eos>'
+  - </s>
+  - <|im_end|>
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_yor.yaml b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd1c9007a394de95a19c1a09b398614366537a1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/adr/prompt_5/afridiacritics_yor.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'You are a linguist specializing in diacritical marks for Yoruba. Diacritics
+  are essential for proper pronunciation and meaning in Yoruba. You are tasked with
+  converting Yoruba sentences  without diacritics into their correctly accented forms.
+  Here''s the input: {{text}}. Return output sentence only'
+include: afridiacritics_yaml
+task: afridiacritics_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/README.md b/lm_eval/tasks/afrobench/afriqa/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8730d7c8d8d68b6b83dfad3d4f584534b048d111
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/README.md
@@ -0,0 +1,24 @@
+#
+
+## Paper
+Title: `AfriQA: Cross-lingual Open-Retrieval Question Answering for African Languages`
+
+Paper Link: https://arxiv.org/abs/2305.06897
+
+## Abstract
+>AfriQA is the first cross-lingual question answering (QA) dataset with a focus on African languages. The dataset includes over 12,000 XOR QA examples across 10 African languages, making it an invaluable resource for developing more equitable QA technology. African languages have historically been underserved in the digital landscape, with far less in-language content available online. This makes it difficult for QA systems to provide accurate information to users in their native language. However, cross-lingual open-retrieval question answering (XOR QA) systems can help fill this gap by retrieving answer content from other languages. AfriQA focuses specifically on African languages where cross-lingual answer content is the only high-coverage source of information. Previous datasets have primarily focused on languages where cross-lingual QA augments coverage from the target language, but AfriQA highlights the importance of African languages as a realistic use case for XOR QA.
+
+HomePage: https://github.com/masakhane-io/afriqa
+
+### Citation
+
+```
+@misc{ogundepo2023afriqa,
+      title={AfriQA: Cross-lingual Open-Retrieval Question Answering for African Languages},
+      author={Odunayo Ogundepo and Tajuddeen R. Gwadabe and Clara E. Rivera and Jonathan H. Clark and Sebastian Ruder and David Ifeoluwa Adelani and Bonaventure F. P. Dossou and Abdou Aziz DIOP and Claytone Sikasote and Gilles Hacheme and Happy Buzaaba and Ignatius Ezeani and Rooweither Mabuya and Salomey Osei and Chris Emezue and Albert Njoroge Kahira and Shamsuddeen H. Muhammad and Akintunde Oladipo and Abraham Toluwase Owodunni and Atnafu Lambebo Tonja and Iyanuoluwa Shode and Akari Asai and Tunde Oluwaseyi Ajayi and Clemencia Siro and Steven Arthur and Mofetoluwa Adeyemi and Orevaoghene Ahia and Aremu Anuoluwapo and Oyinkansola Awosan and Chiamaka Chukwuneke and Bernard Opoku and Awokoya Ayodele and Verrah Otiende and Christine Mwase and Boyd Sinkala and Andre Niyongabo Rubungo and Daniel A. Ajisafe and Emeka Felix Onwuegbuzia and Habib Mbow and Emile Niyomutabazi and Eunice Mukonde and Falalu Ibrahim Lawan and Ibrahim Said Ahmad and Jesujoba O. Alabi and Martin Namukombo and Mbonu Chinedu and Mofya Phiri and Neo Putini and Ndumiso Mngoma and Priscilla A. Amuok and Ruqayya Nasir Iro and Sonia Adhiambo},
+      year={2023},
+      eprint={2305.06897},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
diff --git a/lm_eval/tasks/afrobench/afriqa/afriqa.yaml b/lm_eval/tasks/afrobench/afriqa/afriqa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..80810ca4c1195f281b6eaa9581bf420ff4582291
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/afriqa.yaml
@@ -0,0 +1,13 @@
+group: afriqa
+task:
+  - afriqa_prompt_1
+  - afriqa_prompt_2
+  - afriqa_prompt_3
+  - afriqa_prompt_4
+  - afriqa_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa
new file mode 100644
index 0000000000000000000000000000000000000000..d9b6218e766a57309804e9514cf9d9682cf49131
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa
@@ -0,0 +1,42 @@
+tag:
+    - afrobench_xqa_tasks
+    - afriqa_prompt_1
+dataset_kwargs: {trust_remote_code: True}
+dataset_path: masakhane/afriqa-gold-passages
+dataset_name: null
+output_type: generate_until
+test_split: test
+fewshot_split: train
+doc_to_target: answer_pivot
+should_decontaminate: true
+doc_to_decontamination_query: question_lang
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+target_delimiter: " "
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+  - metric: f1
+    aggregation: !function utils.f1
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_bem.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3b639a833ee4e8fb32d992538b747ab92b1f360
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_bem.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: bem
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_bem_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_fon.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c51196157f95e96315d0321e4b53859ef8e5ae35
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_fon.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_fon_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_hau.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0536590ac3486f815ac35d5333b8a6a268fd851a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_hau.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_ibo.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62eb71160c6cf546b274b8724cd8955c0b0e86c8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_ibo.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_kin.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e632c4beef739a3e299280071229521db78cbf21
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_kin.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_swa.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dbdebe14dccc3eda73ee706e3327a13a43a3aa81
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+fewshot_split: test
+fewshot_config:
+  sampler: first_n
+task: afriqa_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_twi.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67ba171569e34842e2c5af86875d2495f4715421
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_twi.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_twi_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_yor.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51d20e43e0b4dcb6d5da0e61c22ad827085b253f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_yor.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_zul.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c254b96e565e01750c6a838b427f926ed3f40d0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/afriqa_zul.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Your task is to answer a qestion given a context.Make sure you respond
+  with the shortest span containing the answer in the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_zul_prompt_1
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_1/utils.py b/lm_eval/tasks/afrobench/afriqa/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eae1d885037da14892b39715c49e4d3aac61f06f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_1/utils.py
@@ -0,0 +1,53 @@
+import re
+import string
+from collections import Counter
+
+
+def normalize_answer(s):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    Lower text and remove punctuation, articles and extra whitespace.
+    """
+
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1(items):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    """
+
+    unzipped_list = list(zip(*items))
+    golds = unzipped_list[0]
+    preds = unzipped_list[1]
+
+    f1_list = []
+
+    for i in range(len(golds)):
+        prediction_tokens = normalize_answer(preds[i]).split()
+        references_tokens = normalize_answer(golds[i]).split()
+        common = Counter(prediction_tokens) & Counter(references_tokens)
+        num_same = sum(common.values())
+        if num_same == 0:
+            f1_score = 0
+        else:
+            precision = 1.0 * num_same / len(prediction_tokens)
+            recall = 1.0 * num_same / len(references_tokens)
+            f1_score = (2 * precision * recall) / (precision + recall)
+
+        f1_list.append(f1_score)
+
+    return sum(f1_list) / len(f1_list)
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa
new file mode 100644
index 0000000000000000000000000000000000000000..d53ce05b168b8ffaf1325167aaee6537b9b2dbbe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa
@@ -0,0 +1,42 @@
+tag:
+    - afrobench_xqa_tasks
+    - afriqa_prompt_2
+dataset_kwargs: {trust_remote_code: True}
+dataset_path: masakhane/afriqa-gold-passages
+dataset_name: null
+output_type: generate_until
+test_split: test
+fewshot_split: train
+doc_to_target: answer_pivot
+should_decontaminate: true
+doc_to_decontamination_query: question_lang
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+target_delimiter: " "
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+  - metric: f1
+    aggregation: !function utils.f1
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_bem.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2469c7f434e133f0b94c41940917b17368566dfe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_bem.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bem
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Bemba, while the context is in English or French.Make sure you respond with the
+  shortest span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_bem_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_fon.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..384db44987a074c88615164e9da85b71fad2da37
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_fon.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Fon, while the context is in English or French.Make sure you respond with the shortest
+  span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_fon_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_hau.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40c942eced4451a620541f0cce6fe87d8f82e5cb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Hausa, while the context is in English or French.Make sure you respond with the
+  shortest span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_ibo.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8198795d2d1d79855d45d1c00800e56c8ad5742b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Igbo, while the context is in English or French.Make sure you respond with the shortest
+  span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_kin.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7a238ae5cc6f687aeb84d88808f994b66464d8bd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Kinyarwanda, while the context is in English or French.Make sure you respond with
+  the shortest span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_swa.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4be94d07d9ed58745b6e87d38bfe927d20116137
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_swa.yaml
@@ -0,0 +1,16 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Swahili, while the context is in English or French.Make sure you respond with the
+  shortest span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+fewshot_split: test
+fewshot_config:
+  sampler: first_n
+task: afriqa_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_twi.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f08487d0c539b519b2707cc671f7ceda3b50387d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Twi, while the context is in English or French.Make sure you respond with the shortest
+  span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_twi_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_yor.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44aee11a143976b845c8cb3a9c6a1d8cf01ccf17
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Yoruba, while the context is in English or French.Make sure you respond with the
+  shortest span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_zul.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..99c5b18fa243c47f843862950f07e1211745f8b5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/afriqa_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Your task is to answer a question given a context. The question is in
+  Zulu, while the context is in English or French.Make sure you respond with the shortest
+  span in the context that contains the answer.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_zul_prompt_2
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_2/utils.py b/lm_eval/tasks/afrobench/afriqa/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eae1d885037da14892b39715c49e4d3aac61f06f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_2/utils.py
@@ -0,0 +1,53 @@
+import re
+import string
+from collections import Counter
+
+
+def normalize_answer(s):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    Lower text and remove punctuation, articles and extra whitespace.
+    """
+
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1(items):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    """
+
+    unzipped_list = list(zip(*items))
+    golds = unzipped_list[0]
+    preds = unzipped_list[1]
+
+    f1_list = []
+
+    for i in range(len(golds)):
+        prediction_tokens = normalize_answer(preds[i]).split()
+        references_tokens = normalize_answer(golds[i]).split()
+        common = Counter(prediction_tokens) & Counter(references_tokens)
+        num_same = sum(common.values())
+        if num_same == 0:
+            f1_score = 0
+        else:
+            precision = 1.0 * num_same / len(prediction_tokens)
+            recall = 1.0 * num_same / len(references_tokens)
+            f1_score = (2 * precision * recall) / (precision + recall)
+
+        f1_list.append(f1_score)
+
+    return sum(f1_list) / len(f1_list)
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa
new file mode 100644
index 0000000000000000000000000000000000000000..79a923b1b30075d31407e804641b71339e9bedb0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa
@@ -0,0 +1,42 @@
+tag:
+    - afrobench_xqa_tasks
+    - afriqa_prompt_3
+dataset_kwargs: {trust_remote_code: True}
+dataset_path: masakhane/afriqa-gold-passages
+dataset_name: null
+output_type: generate_until
+test_split: test
+fewshot_split: train
+doc_to_target: answer_pivot
+should_decontaminate: true
+doc_to_decontamination_query: question_lang
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+target_delimiter: " "
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+  - metric: f1
+    aggregation: !function utils.f1
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_bem.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3af92f5a4abc656a862c701757d056b836506582
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_bem.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: bem
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_bem_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_fon.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..73c12439632863ee0cc49a84459abd0dbe4ea985
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_fon.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_fon_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_hau.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff08d081971aa54fd645c9e626231e82341ecabb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_hau.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_ibo.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12f18a0bff19f69cbfaa0fce86cf9d61ecd08136
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_ibo.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_kin.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e92dec41c227b0d14e2a646893b7fe4dc55e5425
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_kin.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_swa.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30c574e5fa77ad65dc91d2670720945cdb67c032
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+fewshot_split: test
+fewshot_config:
+  sampler: first_n
+task: afriqa_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_twi.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b08534d9bb98b2603a405aa7d8888a8d4230a52c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_twi.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_twi_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_yor.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3c74ce7c3a86d7df40572096bdddf75ee5321c5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_yor.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_zul.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c54b0bd7f054dc8111c4c72e8fca55d7e7f0a4e6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/afriqa_zul.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Given the context, provide the answer to the following question.Ensure
+  your response is concise and directly from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_zul_prompt_3
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_3/utils.py b/lm_eval/tasks/afrobench/afriqa/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eae1d885037da14892b39715c49e4d3aac61f06f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_3/utils.py
@@ -0,0 +1,53 @@
+import re
+import string
+from collections import Counter
+
+
+def normalize_answer(s):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    Lower text and remove punctuation, articles and extra whitespace.
+    """
+
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1(items):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    """
+
+    unzipped_list = list(zip(*items))
+    golds = unzipped_list[0]
+    preds = unzipped_list[1]
+
+    f1_list = []
+
+    for i in range(len(golds)):
+        prediction_tokens = normalize_answer(preds[i]).split()
+        references_tokens = normalize_answer(golds[i]).split()
+        common = Counter(prediction_tokens) & Counter(references_tokens)
+        num_same = sum(common.values())
+        if num_same == 0:
+            f1_score = 0
+        else:
+            precision = 1.0 * num_same / len(prediction_tokens)
+            recall = 1.0 * num_same / len(references_tokens)
+            f1_score = (2 * precision * recall) / (precision + recall)
+
+        f1_list.append(f1_score)
+
+    return sum(f1_list) / len(f1_list)
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa
new file mode 100644
index 0000000000000000000000000000000000000000..e251f1e27fab773d7fd54364ebfc870819df5d55
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa
@@ -0,0 +1,42 @@
+tag:
+    - afrobench_xqa_tasks
+    - afriqa_prompt_4
+dataset_kwargs: {trust_remote_code: True}
+dataset_path: masakhane/afriqa-gold-passages
+dataset_name: null
+output_type: generate_until
+test_split: test
+fewshot_split: train
+doc_to_target: answer_pivot
+should_decontaminate: true
+doc_to_decontamination_query: question_lang
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+target_delimiter: " "
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+  - metric: f1
+    aggregation: !function utils.f1
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_bem.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db3d1c2a142ae6b6b1bd86678027df58c8715785
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_bem.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bem
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_bem_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_fon.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c65dd07265d54a8c2aad56c563073fab7b38b49
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_fon.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_fon_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_hau.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..baeaf020b05b04e025024ac7dd4d3f6bf86caaa9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_ibo.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6db1cc71614a9e316eeb3b12a3f0740d9cb6e671
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_kin.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dc8f3678207cdbc1dab076573ed7aeea02b19b92
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_kin_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_swa.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4fe8fbcdf2a298f9b8c58047da94c24cddc72fdc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_swa.yaml
@@ -0,0 +1,16 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+fewshot_split: test
+fewshot_config:
+  sampler: first_n
+task: afriqa_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_twi.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d679cd0bb6173696e7555158356b067f2eea895c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_twi_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_yor.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6011dc3313ff35fe55ed204529c5d0c1503d468a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_zul.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..26a6ccad3efe93ea094a64189b5efbe1dddb9734
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/afriqa_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'You are an AI assistant and your task is to answer the question based
+  on the provided context.Your answer should be the shortest span that contains the
+  answer within the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_zul_prompt_4
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_4/utils.py b/lm_eval/tasks/afrobench/afriqa/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eae1d885037da14892b39715c49e4d3aac61f06f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_4/utils.py
@@ -0,0 +1,53 @@
+import re
+import string
+from collections import Counter
+
+
+def normalize_answer(s):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    Lower text and remove punctuation, articles and extra whitespace.
+    """
+
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1(items):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    """
+
+    unzipped_list = list(zip(*items))
+    golds = unzipped_list[0]
+    preds = unzipped_list[1]
+
+    f1_list = []
+
+    for i in range(len(golds)):
+        prediction_tokens = normalize_answer(preds[i]).split()
+        references_tokens = normalize_answer(golds[i]).split()
+        common = Counter(prediction_tokens) & Counter(references_tokens)
+        num_same = sum(common.values())
+        if num_same == 0:
+            f1_score = 0
+        else:
+            precision = 1.0 * num_same / len(prediction_tokens)
+            recall = 1.0 * num_same / len(references_tokens)
+            f1_score = (2 * precision * recall) / (precision + recall)
+
+        f1_list.append(f1_score)
+
+    return sum(f1_list) / len(f1_list)
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa
new file mode 100644
index 0000000000000000000000000000000000000000..fab00068beb951dbab88d4baa870fabfced4f820
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa
@@ -0,0 +1,42 @@
+tag:
+    - afrobench_xqa_tasks
+    - afriqa_prompt_5
+dataset_kwargs: {trust_remote_code: True}
+dataset_path: masakhane/afriqa-gold-passages
+dataset_name: null
+output_type: generate_until
+test_split: test
+fewshot_split: train
+doc_to_target: answer_pivot
+should_decontaminate: true
+doc_to_decontamination_query: question_lang
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+filter_list:
+  - name: remove_whitespace
+    filter:
+      - function: remove_whitespace
+      - function: take_first
+target_delimiter: " "
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+  - metric: f1
+    aggregation: !function utils.f1
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+      - "."
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_bem.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4288845d30d2e3ec1620dd1a226bf17d385322be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_bem.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: bem
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_bem_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_fon.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c234e944783b0c456b1d0a0599d43a1d569ad18f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_fon.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_fon_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_hau.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34823c9e47d2e8d614df530559c638973a02d956
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_hau.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_ibo.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6582d2d56632e96a3d14c646a47dd7d4b55b1652
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_ibo.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_kin.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed9d6517878d8ba4e29342c915648fdb48fdd45e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_kin.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_kin_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_swa.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dfcfb147f8d7789d89e0521129ba1b01f1725384
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+fewshot_split: test
+fewshot_config:
+  sampler: first_n
+task: afriqa_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_twi.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cde555cf760b159378dbc137a4759ab3c87a6b4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_twi.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_twi_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_yor.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c9fa17e82c271118309bf6769efff0635cf94230
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_yor.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_zul.yaml b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..427e7217f4b113e28ad3efc568b35ab093eed463
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/afriqa_zul.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Using the context, find the answer to the question.Respond with the
+  briefest span that includes the answer from the context.
+
+  Question: {{question_lang}}
+
+  Context: {{context}}
+
+  Answer:'
+include: afriqa
+task: afriqa_zul_prompt_5
diff --git a/lm_eval/tasks/afrobench/afriqa/prompt_5/utils.py b/lm_eval/tasks/afrobench/afriqa/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eae1d885037da14892b39715c49e4d3aac61f06f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/prompt_5/utils.py
@@ -0,0 +1,53 @@
+import re
+import string
+from collections import Counter
+
+
+def normalize_answer(s):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    Lower text and remove punctuation, articles and extra whitespace.
+    """
+
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def f1(items):
+    """
+    Taken from the official evaluation script for v1.1 of the SQuAD dataset.
+    """
+
+    unzipped_list = list(zip(*items))
+    golds = unzipped_list[0]
+    preds = unzipped_list[1]
+
+    f1_list = []
+
+    for i in range(len(golds)):
+        prediction_tokens = normalize_answer(preds[i]).split()
+        references_tokens = normalize_answer(golds[i]).split()
+        common = Counter(prediction_tokens) & Counter(references_tokens)
+        num_same = sum(common.values())
+        if num_same == 0:
+            f1_score = 0
+        else:
+            precision = 1.0 * num_same / len(prediction_tokens)
+            recall = 1.0 * num_same / len(references_tokens)
+            f1_score = (2 * precision * recall) / (precision + recall)
+
+        f1_list.append(f1_score)
+
+    return sum(f1_list) / len(f1_list)
diff --git a/lm_eval/tasks/afrobench/afriqa/utils.py b/lm_eval/tasks/afrobench/afriqa/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fef58f013ff9d31da0a952e1315cc09b53c2e74
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afriqa/utils.py
@@ -0,0 +1,125 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Your task is to answer a question given a context."
+        "Make sure you respond with the shortest span containing the answer in the context.\n"
+        "Question: {{question_lang}}\n"
+        "Context: {{context}}\n"
+        "Answer:",
+        "prompt_2": f"Your task is to answer a question given a context. The question is in {lang}, while the context is in English or French."
+        "Make sure you respond with the shortest span in the context that contains the answer.\n"
+        "Question: {{question_lang}}\n"
+        "Context: {{context}}\n"
+        "Answer:",
+        "prompt_3": "Given the context, provide the answer to the following question."
+        "Ensure your response is concise and directly from the context.\n"
+        "Question: {{question_lang}}\n"
+        "Context: {{context}}\n"
+        "Answer:",
+        "prompt_4": "You are an AI assistant and your task is to answer the question based on the provided context."
+        "Your answer should be the shortest span that contains the answer within the context.\n"
+        "Question: {{question_lang}}\n"
+        "Context: {{context}}\n"
+        "Answer:",
+        "prompt_5": "Using the context, find the answer to the question."
+        "Respond with the briefest span that includes the answer from the context.\n"
+        "Question: {{question_lang}}\n"
+        "Context: {{context}}\n"
+        "Answer:",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "bem": "Bemba",
+        "fon": "Fon",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "kin": "Kinyarwanda",
+        "swa": "Swahili",
+        "twi": "Twi",
+        "wol": "Wolof",
+        "yor": "Yoruba",
+        "zul": "Zulu",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"afriqa_{lang}.yaml"
+            task_name = f"afriqa_{lang}_{mode}"
+            yaml_template = "afriqa"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/afrisenti/README.md b/lm_eval/tasks/afrobench/afrisenti/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..99bd489e3eb2cd99eb888a0c2903a4c6259668df
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/README.md
@@ -0,0 +1,58 @@
+#
+
+## Paper
+Title: `AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages`
+
+Paper Link: https://aclanthology.org/2023.emnlp-main.862/
+
+## Abstract
+>Africa is home to over 2,000 languages from over six language families and has the highest linguistic diversity among all continents. This includes 75 languages with at least one million speakers each. Yet, there is little NLP research conducted on African languages. Crucial in enabling such research is the availability of high-quality annotated datasets. In this paper, we introduce AfriSenti, a sentiment analysis benchmark that contains a total of >110,000 tweets in 14 African languages (Amharic, Algerian Arabic, Hausa, Igbo, Kinyarwanda, Moroccan Arabic, Mozambican Portuguese, Nigerian Pidgin, Oromo, Swahili, Tigrinya, Twi, Xitsonga, and Yoruba) from four language families. The tweets were annotated by native speakers and used in the AfriSenti-SemEval shared task (with over 200 participants, see website: https://afrisenti-semeval.github.io). We describe the data collection methodology, annotation process, and the challenges we dealt with when curating each dataset. We further report baseline experiments conducted on the AfriSenti datasets and discuss their usefulness.
+
+HomePage: https://github.com/afrisenti-semeval/afrisent-semeval-2023
+
+### Citation
+
+```
+@inproceedings{muhammad-etal-2023-afrisenti,
+    title = "{A}fri{S}enti: A {T}witter Sentiment Analysis Benchmark for {A}frican Languages",
+    author = "Muhammad, Shamsuddeen Hassan  and
+      Abdulmumin, Idris  and
+      Ayele, Abinew Ali  and
+      Ousidhoum, Nedjma  and
+      Adelani, David Ifeoluwa  and
+      Yimam, Seid Muhie  and
+      Ahmad, Ibrahim Sa'id  and
+      Beloucif, Meriem  and
+      Mohammad, Saif M.  and
+      Ruder, Sebastian  and
+      Hourrane, Oumaima  and
+      Brazdil, Pavel  and
+      Jorge, Alipio  and
+      Ali, Felermino D{\'a}rio M{\'a}rio Ant{\'o}nio  and
+      David, Davis  and
+      Osei, Salomey  and
+      Shehu Bello, Bello  and
+      Ibrahim, Falalu  and
+      Gwadabe, Tajuddeen  and
+      Rutunda, Samuel  and
+      Belay, Tadesse  and
+      Messelle, Wendimu Baye  and
+      Balcha, Hailu Beshada  and
+      Chala, Sisay Adugna  and
+      Gebremichael, Hagos Tesfahun  and
+      Opoku, Bernard  and
+      Arthur, Stephen",
+    editor = "Bouamor, Houda  and
+      Pino, Juan  and
+      Bali, Kalika",
+    booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
+    month = dec,
+    year = "2023",
+    address = "Singapore",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.emnlp-main.862/",
+    doi = "10.18653/v1/2023.emnlp-main.862",
+    pages = "13968--13981",
+    abstract = "Africa is home to over 2,000 languages from over six language families and has the highest linguistic diversity among all continents. This includes 75 languages with at least one million speakers each. Yet, there is little NLP research conducted on African languages. Crucial in enabling such research is the availability of high-quality annotated datasets. In this paper, we introduce AfriSenti, a sentiment analysis benchmark that contains a total of {\ensuremath{>}}110,000 tweets in 14 African languages (Amharic, Algerian Arabic, Hausa, Igbo, Kinyarwanda, Moroccan Arabic, Mozambican Portuguese, Nigerian Pidgin, Oromo, Swahili, Tigrinya, Twi, Xitsonga, and Yoruba) from four language families. The tweets were annotated by native speakers and used in the AfriSenti-SemEval shared task (with over 200 participants, see website: https://afrisenti-semeval.github.io). We describe the data collection methodology, annotation process, and the challenges we dealt with when curating each dataset. We further report baseline experiments conducted on the AfriSenti datasets and discuss their usefulness."
+}
+```
diff --git a/lm_eval/tasks/afrobench/afrisenti/afrisenti.yaml b/lm_eval/tasks/afrobench/afrisenti/afrisenti.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36a1efdb3033e70060251e346847b73fd9de2f60
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/afrisenti.yaml
@@ -0,0 +1,13 @@
+group: afrisenti
+task:
+  - afrisenti_prompt_1
+  - afrisenti_prompt_2
+  - afrisenti_prompt_3
+  - afrisenti_prompt_4
+  - afrisenti_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/afrisenti/fewshot.sh b/lm_eval/tasks/afrobench/afrisenti/fewshot.sh
new file mode 100644
index 0000000000000000000000000000000000000000..428d455b65ac917efee1810a68626f36e777e2d9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/fewshot.sh
@@ -0,0 +1,109 @@
+lm_eval --model hf \
+        --model_args pretrained=masakhane/African-ultrachat-alpaca  \
+        --tasks afrimmlu_direct_amh,afrimmlu_direct_eng,afrimmlu_direct_ewe,afrimmlu_direct_fra,afrimmlu_direct_hau,afrimmlu_direct_ibo,afrimmlu_direct_kin,afrimmlu_direct_lin,afrimmlu_direct_lug,afrimmlu_direct_orm,afrimmlu_direct_sna,afrimmlu_direct_sot,afrimmlu_direct_twi,afrimmlu_direct_wol,afrimmlu_direct_xho,afrimmlu_direct_yor,afrimmlu_direct_zul   \
+        --device cuda:0     \
+        --batch_size 1 \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --wandb_args project=afrimmlu
+
+
+lm_eval --model hf \
+        --model_args pretrained=bigscience/mt0-small,parallelize=true \
+        --tasks afrisenti_amh_prompt_1,afrisenti_arq_prompt_1,afrisenti_ary_prompt_1,afrisenti_hau_prompt_1,afrisenti_ibo_prompt_1,afrisenti_kin_prompt_1,afrisenti_orm_prompt_1,afrisenti_pcm_prompt_1,afrisenti_por_prompt_1,afrisenti_swa_prompt_1,afrisenti_tir_prompt_1,afrisenti_tso_prompt_1,afrisenti_twi_prompt_1,afrisenti_yor_prompt_1\
+        --device cuda:0     \
+        --batch_size 1 \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --limit 5
+
+
+lm_eval --model hf \
+        --model_args pretrained=bigscience/mt0-xxl,parallelize=true  \
+        --tasks afrisenti_amh_prompt_1,afrisenti_arq_prompt_1,afrisenti_ary_prompt_1,afrisenti_hau_prompt_1,afrisenti_ibo_prompt_1,afrisenti_kin_prompt_1,afrisenti_orm_prompt_1,afrisenti_pcm_prompt_1,afrisenti_por_prompt_1,afrisenti_swa_prompt_1,afrisenti_tir_prompt_1,afrisenti_tso_prompt_1,afrisenti_twi_prompt_1,afrisenti_yor_prompt_1\
+        --batch_size 128 \
+        --num_fewshot 0 \
+        --verbosity DEBUG
+
+lm_eval --model hf \
+        --model_args pretrained=google/gemma-2-27b-it,parallelize=true,trust_remote_code=True \
+        --tasks afriqa_wol_prompt_2\
+        --batch_size 1 \
+        --device 'cuda' \
+        --num_fewshot 5 \
+        --verbosity DEBUG \
+        --output_path './afriqa_results/' \
+        --log_samples
+
+lm_eval --model vllm \
+        --model_args pretrained=meta-llama/Llama-2-7b-chat-hf,tensor_parallel_size=2,dtype='auto',gpu_memory_utilization=0.8,data_parallel_size=1 \
+        --tasks masakhapos_pcm_prompt_1,masakhapos_pcm_prompt_2,masakhapos_pcm_prompt_3,masakhapos_pcm_prompt_4,masakhapos_pcm_prompt_5 \
+        --batch_size 'auto' \
+        --device 'cuda' \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --limit 2
+
+
+lm_eval --model vllm \
+        --model_args pretrained=meta-llama/Llama-2-7b-chat-hf,tensor_parallel_size=2,dtype='auto',gpu_memory_utilization=0.8,data_parallel_size=1 \
+        --tasks masakhapos_pcm_prompt_1,masakhapos_pcm_prompt_2,masakhapos_pcm_prompt_3,masakhapos_bam_prompt_2,masakhapos_bbj_prompt_3 \
+        --batch_size 'auto' \
+        --device 'cuda' \
+        --num_fewshot 0 \
+        --verbosity DEBUG
+
+lm_eval --model vllm \
+        --model_args pretrained=google/gemma-1.1-7b-it,tensor_parallel_size=2,dtype='auto',gpu_memory_utilization=0.8,data_parallel_size=1 \
+        --tasks masakhaner_pcm_prompt_1\
+        --batch_size 'auto' \
+        --device 'cuda' \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --limit 5
+
+lm_eval --model vllm \
+        --model_args pretrained=google/gemma-2-9b-it,tensor_parallel_size=2,dtype='auto',gpu_memory_utilization=0.8,data_parallel_size=1 \
+        --tasks masakhaner_pcm_prompt_1,masakhaner_pcm_prompt_2,masakhaner_pcm_prompt_3,masakhaner_pcm_prompt_4,masakhaner_pcm_prompt_5\
+        --batch_size 'auto' \
+        --device 'cuda' \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --limit 5
+
+lm_eval --model vllm \
+        --model_args pretrained=google/gemma-1.1-7b-it,tensor_parallel_size=2,dtype='auto',gpu_memory_utilization=0.8,data_parallel_size=1 \
+        --tasks flores_eng_Latn-fuv_Latn_prompt_1,flores_eng_Latn-fuv_Latn_prompt_2,flores_eng_Latn-fuv_Latn_prompt_3,flores_fuv_Latn-eng_Latn_prompt_1,flores_fuv_Latn-eng_Latn_prompt_2,flores_fuv_Latn-eng_Latn_prompt_3 \
+        --batch_size 'auto' \
+        --device 'cuda' \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --limit 2
+
+lm_eval --model vllm \
+        --model_args pretrained=google/gemma-2-27b-it,tensor_parallel_size=2,dtype='auto',gpu_memory_utilization=0.9,data_parallel_size=1 \
+        --tasks masakhapos_twi_prompt_3,masakhapos_wol_prompt_3,masakhapos_xho_prompt_3,masakhapos_yor_prompt_3,masakhapos_zul_prompt_3\
+        --batch_size 'auto' \
+        --num_fewshot 5 \
+        --verbosity DEBUG \
+        --output_path './masakhapos_results/' \
+        --log_samples
+
+lm_eval --model hf \
+        --model_args pretrained=bigscience/mt0-small,parallelize=true \
+        --tasks  injongointent_amh_prompt_1,injongointent_eng_prompt_1,injongointent_yor_prompt_1,injongointent_ibo_prompt_1,injongointent_wol_prompt_1\
+        --device 'mps'  \
+        --batch_size 1 \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --limit 5
+
+lm_eval --model hf \
+        --model_args pretrained=google/gemma-3-27b-it,parallelize=true \
+        --tasks  afrobench_sentiment_tasks\
+        --device 'cuda'  \
+        --batch_size 1 \
+        --num_fewshot 0 \
+        --verbosity DEBUG \
+        --output_path './senti_results/' \
+        --log_samples
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti
new file mode 100644
index 0000000000000000000000000000000000000000..69ef6b2bc08bbc198e2c6610c7c40041db4d20a4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti
@@ -0,0 +1,41 @@
+tag:
+    - afrobench_sentiment_tasks
+    - afrisenti_prompt_1
+task: null
+dataset_path: masakhane/afrisenti
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_text: 'Does this statement; "{{tweet}}" have a Neutral, Positive or Negative sentiment? Labels only'
+doc_to_target: label
+doc_to_choice:
+    - "negative"
+    - "positive"
+    - "neutral"
+should_decontaminate: true
+doc_to_decontamination_query: tweet
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_amh.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7eefbe867360070e0701a558c124ad4ad7da786a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: amh
+include: afrisenti
+task: afrisenti_amh_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_arq.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_arq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b2e2522d94e2d0da8cb9efec72d225c7161f8e7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_arq.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: arq
+include: afrisenti
+task: afrisenti_arq_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_ary.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f9ef3f20d654a0e97e40a9dd4e3d6bd2e7d949b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_ary.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ary
+include: afrisenti
+task: afrisenti_ary_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_hau.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0ab9071abbc0211b8048743db43347bb5df1583
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: hau
+include: afrisenti
+task: afrisenti_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_ibo.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0176d08764dae9a5fd8af57dc903b6b55ab0124
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: ibo
+include: afrisenti
+task: afrisenti_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_kin.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75bb717a6e22d931404d2c7cfc919cfa8f99453d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: kin
+include: afrisenti
+task: afrisenti_kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_orm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65c63b06fbb721d59033e5f748c6899270e92831
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_orm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: orm
+include: afrisenti
+task: afrisenti_orm_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_pcm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f24fe9fc01cc162294aa1b387676591450b2d39
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_pcm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: pcm
+include: afrisenti
+task: afrisenti_pcm_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_por.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e1b4cd60a1533ab6804624c8e967b46c37a69be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_por.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: por
+include: afrisenti
+task: afrisenti_por_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_swa.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3386948ccf5eef56d293cc39d0811ab06e1a5127
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: swa
+include: afrisenti
+task: afrisenti_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_tir.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4942628e8115f58a047d7819052b27cc50883e9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_tir.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: tir
+include: afrisenti
+task: afrisenti_tir_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_tso.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d500693270946b6581020bc68a38612bdfd4f033
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_tso.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: tso
+include: afrisenti
+task: afrisenti_tso_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_twi.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a68bb23dcaeedcc6c97768c42a1e49c26b425e40
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: twi
+include: afrisenti
+task: afrisenti_twi_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_yor.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fda98c2c82c6e323eae8c96843e657e07a4d9665
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/afrisenti_yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: yor
+include: afrisenti
+task: afrisenti_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/run.sh b/lm_eval/tasks/afrobench/afrisenti/prompt_1/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..50d1a1338f87330219dd4c6f79fa85ef918bb21c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/run.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+models=(
+
+  "google/gemma-1.1-7b-it"
+  "CohereForAI/aya-101"
+  "meta-llama/Llama-2-7b-chat-hf"
+  "meta-llama/Meta-Llama-3-8B-Instruct"
+  "google/gemma-2-9b-it"
+  "bigscience/mt0-xxl"
+  "google/gemma-2-27b-it"
+  "meta-llama/Meta-Llama-3-70B-Instruct"
+)
+task=afrisenti_amh_prompt_1,afrisenti_arq_prompt_1,afrisenti_ary_prompt_1,afrisenti_hau_prompt_1,afrisenti_ibo_prompt_1,afrisenti_kin_prompt_1,afrisenti_pcm_prompt_1,afrisenti_por_prompt_1,afrisenti_swa_prompt_1,afrisenti_tir_prompt_1,afrisenti_tso_prompt_1,afrisenti_twi_prompt_1,afrisenti_yor_prompt_1
+
+for model in "${models[@]}"
+do
+  echo "Evaluating model: $model"
+  for fewshot in 0 5
+  do
+    export OUTPUT_DIR=results/$fewshot
+
+    mkdir -p "$OUTPUT_DIR"
+
+    lm_eval --model hf \
+            --model_args "pretrained=${model}" \
+            --tasks $task\
+            --device cuda:0 \
+            --batch_size 16 \
+            --output_path "$OUTPUT_DIR" \
+            --num_fewshot $fewshot \
+            --verbosity DEBUG
+  done
+done
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/utils.py b/lm_eval/tasks/afrobench/afrisenti/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_1/xx.py b/lm_eval/tasks/afrobench/afrisenti/prompt_1/xx.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca0e325e526e33dfd19ba03a93652244977fc119
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_1/xx.py
@@ -0,0 +1,13 @@
+from datasets import load_dataset
+
+
+# ['amh', 'hau', 'ibo', 'arq', 'ary', 'yor', 'por', 'twi', 'tso', 'tir', 'orm', 'pcm', 'kin', 'swa']
+
+data = load_dataset("masakhane/afrisenti", "pcm", trust_remote_code=True)
+print(data)
+print(data["test"][:5])
+#
+# ['Naija', 'Pipo', 'wey', 'dey', 'for', 'inside', 'social', 'Media', 'sef', 'don', 'put', 'hand', 'for', 'ear', 'give',
+#  'federal', 'goment', 'and', 'polical', 'leader', 'dem', 'ova', 'di', 'kilin', '.']
+#
+# [6, 0, 14, 17, 2, 2, 6, 0, 7, 17, 16, 0, 2, 0, 16, 0, 0, 9, 0, 0, 11, 2, 8, 0, 1]
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti
new file mode 100644
index 0000000000000000000000000000000000000000..879f2826c3f26025fcb5e41342f86ef3f9c6c677
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti
@@ -0,0 +1,39 @@
+tag:
+    - afrobench_sentiment_tasks
+    - afrisent_prompt_2
+dataset_path: masakhane/afrisenti
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: label
+doc_to_choice:
+    - "negative"
+    - "positive"
+    - "neutral"
+should_decontaminate: true
+doc_to_decontamination_query: 'text: {{tweet}} \nlabel: '
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_amh.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d97b2c25787d9338546dba3707afcda31ad31269
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_amh.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: Does this Amharic statement; '{{tweet}}' have a Neutral, Positive or
+  Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_amh_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_arq.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_arq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c61e310dee3a9f97557aaaa8d465ce329ba29610
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_arq.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: arq
+doc_to_text: Does this Algerian Arabic statement; '{{tweet}}' have a Neutral, Positive
+  or Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_arq_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_ary.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e76d385b3dc5ee5bbbe817336ec9d78feef7eb7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_ary.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ary
+doc_to_text: Does this Moroccan Arabic statement; '{{tweet}}' have a Neutral, Positive
+  or Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_ary_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_hau.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f7b0ccb2811b30acc98fe594af33b0a38fb2a88b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: Does this Hausa statement; '{{tweet}}' have a Neutral, Positive or Negative
+  sentiment? Labels only
+include: afrisenti
+task: afrisenti_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_ibo.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4d6c6c8094bb5b41e5488c972f2702705c9f3d5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: Does this Igbo statement; '{{tweet}}' have a Neutral, Positive or Negative
+  sentiment? Labels only
+include: afrisenti
+task: afrisenti_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_kin.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5067b9fb75cd1579a8bc915a3a010696ca60b177
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_kin.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: Does this Kinyarwanda statement; '{{tweet}}' have a Neutral, Positive
+  or Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_orm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8abbbfbd73687a4249238a3f2ad988b85634531
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_orm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: Does this Oromo statement; '{{tweet}}' have a Neutral, Positive or Negative
+  sentiment? Labels only
+include: afrisenti
+task: afrisenti_orm_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_pcm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4dd98925299e9b872d49bdb9bea0ebd2b48d1ec7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_pcm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: Does this Nigerian Pidgin statement; '{{tweet}}' have a Neutral, Positive
+  or Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_pcm_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_por.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b8beecff946f4764becafb103f890ea924926a9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_por.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: por
+doc_to_text: Does this Mozambique Portuguese statement; '{{tweet}}' have a Neutral,
+  Positive or Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_por_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_swa.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..496da1a1d1e2b4fcfa004e918af85e7321a1ed29
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_swa.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: Does this Swahili statement; '{{tweet}}' have a Neutral, Positive or
+  Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_tir.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3899c992ed3180d76f2ff677c148026da6d5e9a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_tir.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: Does this Tigrinya statement; '{{tweet}}' have a Neutral, Positive or
+  Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_tir_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_tso.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b371b7479489bd5387d2d18cd7f2edab8496dc00
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_tso.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tso
+doc_to_text: Does this Xithonga statement; '{{tweet}}' have a Neutral, Positive or
+  Negative sentiment? Labels only
+include: afrisenti
+task: afrisenti_tso_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_twi.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c985efc4d32d30ae7f18ed5aac13c97d4dbe112b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_twi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: Does this Twi statement; '{{tweet}}' have a Neutral, Positive or Negative
+  sentiment? Labels only
+include: afrisenti
+task: afrisenti_twi_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_yor.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78932ed4cfe5f88bc91a6a0d26eb8c33a71c1ecb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/afrisenti_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: Does this Yoruba statement; '{{tweet}}' have a Neutral, Positive or Negative
+  sentiment? Labels only
+include: afrisenti
+task: afrisenti_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/run.sh b/lm_eval/tasks/afrobench/afrisenti/prompt_2/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..48797912512124c9c5287dcdd654e5fa04a029b0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/run.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+models=(
+
+  "google/gemma-1.1-7b-it"
+  "CohereForAI/aya-101"
+  "meta-llama/Llama-2-7b-chat-hf"
+  "meta-llama/Meta-Llama-3-8B-Instruct"
+  "google/gemma-2-9b-it"
+  "bigscience/mt0-xxl"
+  "google/gemma-2-27b-it"
+  "meta-llama/Meta-Llama-3-70B-Instruct"
+)
+
+for model in "${models[@]}"
+do
+  echo "Evaluating model: $model"
+  for fewshot in 0 5
+  do
+    export OUTPUT_DIR=./results/$fewshot
+
+    mkdir -p "$OUTPUT_DIR"
+
+    lm_eval --model hf \
+            --model_args "pretrained=${model},parallelize: true" \
+            --tasks afribench\
+            --batch_size 256 \
+            --output_path "$OUTPUT_DIR" \
+            --num_fewshot $fewshot \
+            --verbosity DEBUG \
+            --limit 2
+  done
+done
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/utils.py b/lm_eval/tasks/afrobench/afrisenti/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_2/xx.py b/lm_eval/tasks/afrobench/afrisenti/prompt_2/xx.py
new file mode 100644
index 0000000000000000000000000000000000000000..4aa0db7af761fd7ea8858383b4564130a374f223
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_2/xx.py
@@ -0,0 +1,5 @@
+from datasets import load_dataset
+
+
+data = load_dataset("HausaNLP/AfriSenti-Twitter", "yor", trust_remote_code=True)
+print(data)
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti
new file mode 100644
index 0000000000000000000000000000000000000000..53cb77771f2cc6622fa4c67ea5ea20485df761d6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti
@@ -0,0 +1,39 @@
+tag:
+    - afrobench_sentiment_tasks
+    - afrisenti_prompt_3
+dataset_path: masakhane/afrisenti
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: label
+doc_to_choice:
+    - "negative"
+    - "positive"
+    - "neutral"
+should_decontaminate: true
+doc_to_decontamination_query: 'text: {{tweet}} \nlabel: '
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_amh.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2645b72befa6b0048986f68aa14b4d5ed60027dc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Amharic statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_amh_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_arq.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_arq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b90f690e93f249e8c4668bb74a667ff19a39247
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_arq.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: arq
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Algerian Arabic statement below? Return only the labels. \n\ntext: {{tweet}} \n\
+  label:"
+include: afrisenti
+task: afrisenti_arq_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_ary.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba11ee3e5146db8bfef7680becfb95a6f0a9b6aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_ary.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ary
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Moroccan Arabic statement below? Return only the labels. \n\ntext: {{tweet}} \n\
+  label:"
+include: afrisenti
+task: afrisenti_ary_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_hau.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f4e6b3fb3252929fcd2d0f30240ca8d4553a009
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Hausa statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_ibo.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbcc88d70447a6257d1acefa94b65336d6b330c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Igbo statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_kin.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52d84b2684f9a071c0dc6b5889ad790e3116b0fc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Kinyarwanda statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_orm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2d524bfd781b37cb156ce09fd7fc5aae493392b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Oromo statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_orm_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_pcm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb0ac8ff3bc1b3e0c8efa9fe9afdc40ea0c0690a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_pcm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Nigerian Pidgin statement below? Return only the labels. \n\ntext: {{tweet}} \n\
+  label:"
+include: afrisenti
+task: afrisenti_pcm_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_por.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..821a4355b044844d3608c01950b949ce5f292ba2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_por.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: por
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Mozambique Portuguese statement below? Return only the labels. \n\ntext: {{tweet}}\
+  \ \nlabel:"
+include: afrisenti
+task: afrisenti_por_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_swa.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8e92842e01b61cc815fb48f7a390c6f13587e18
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Swahili statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_tir.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0f96c24a290d33a3c16754f3c0412ac11cb285a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_tir.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Tigrinya statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_tir_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_tso.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8355035e963a13e2da541c65d4f778c5f0d46a58
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_tso.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tso
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Xithonga statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_tso_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_twi.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..98809176e9693cd65ec711fb81739fdbe0030e70
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Twi statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_twi_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_yor.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d1b7ac324b28b7a64220b6b318aabf9537594fc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/afrisenti_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are an assistant able to detect sentiments in tweets. \n\nGiven\
+  \ the sentiment labels Neutral, Positive or Negative; what is the sentiment of the\
+  \ Yoruba statement below? Return only the labels. \n\ntext: {{tweet}} \nlabel:"
+include: afrisenti
+task: afrisenti_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/utils.py b/lm_eval/tasks/afrobench/afrisenti/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_3/xx.py b/lm_eval/tasks/afrobench/afrisenti/prompt_3/xx.py
new file mode 100644
index 0000000000000000000000000000000000000000..2133cfa0139de116c3d54e6c3866c5e4c26bbc53
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_3/xx.py
@@ -0,0 +1,5 @@
+from datasets import load_dataset
+
+
+data = load_dataset("masakhane/afrisenti", "por", trust_remote_code=True)
+print(data)
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti
new file mode 100644
index 0000000000000000000000000000000000000000..6464d7b21693a1565f8479757a89a650cf84ff0c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti
@@ -0,0 +1,39 @@
+tag:
+    - afrobench_sentiment_tasks
+    - afrisenti_prompt_4
+dataset_path: masakhane/afrisenti
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: label
+doc_to_choice:
+    - "negative"
+    - "positive"
+    - "neutral"
+should_decontaminate: true
+doc_to_decontamination_query: 'text: {{tweet}} \nlabel: '
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_amh.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a30a72a45d38f6f4221a1e6ceaef93dd5472bbd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_amh.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_amh_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_arq.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_arq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..125771f5d6877585bb2b9a50121da7e5a56d7805
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_arq.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: arq
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_arq_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_ary.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7868fbf3e6739cd69a4732a09b80cc08359ccbe8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_ary.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ary
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_ary_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_hau.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e7e9a443e5926dfe4de5074e1416417d38b4447
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_ibo.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..686e16c29e4aad0ff13e160befeb31e5b25a7f54
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_kin.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7061ab63523b3b5cb34ec1bb0d35db11fdefd5d0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_kin.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_kin_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_orm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f745ebb28e843d400ccba5973de312d841a2592
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_orm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_orm_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_pcm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5071134ac6d1645650edef34ee6dde2d3bdabce
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_pcm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_pcm_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_por.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5196bcf58303558d5a214caffde460b8675d08f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_por.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: por
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_por_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_swa.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97b9e4f1a2fc87433bf511e6e511bd9f54284d5e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_swa.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_tir.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..02dfca854e166ca3d96a570e8db126e646fbd5b5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_tir.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_tir_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_tso.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa83c1378d15b8c3cedd8582025aea8ea795bb07
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_tso.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tso
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_tso_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_twi.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b4366d1f27902651502a96adfc9c09fb8d82dfde
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_twi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_twi_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_yor.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8394706cdfbfb1124d4c25c987901d5f338cd5f7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/afrisenti_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Label the following text as Neutral, Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/utils.py b/lm_eval/tasks/afrobench/afrisenti/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_4/xx.py b/lm_eval/tasks/afrobench/afrisenti/prompt_4/xx.py
new file mode 100644
index 0000000000000000000000000000000000000000..4515053c4265ba0b6bc9afa9f876d20ef5fc5c2a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_4/xx.py
@@ -0,0 +1,5 @@
+from datasets import load_dataset
+
+
+data = load_dataset("masakhane/afrisenti", "orm", trust_remote_code=True)
+print(data)
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti
new file mode 100644
index 0000000000000000000000000000000000000000..5107bb80d5333a462afda9a8efb62a6fd039a733
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti
@@ -0,0 +1,39 @@
+tag:
+    - afrobench_sentiment_tasks
+    - afrisenti_prompt_5
+dataset_path: masakhane/afrisenti
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: label
+doc_to_choice:
+    - "negative"
+    - "positive"
+    - "neutral"
+should_decontaminate: true
+doc_to_decontamination_query: 'Text: {{tweet}} \nlabel:'
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_amh.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..866ffbe9fcab9e67a1ba4a9781dddd1ef60e8043
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_amh.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Amharic text. For each input, classify the sentiment as positive, negative, or\
+  \ neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_amh_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_arq.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_arq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..783785c031aab91f8840df8e70316ab1d4e24606
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_arq.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: arq
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Algerian Arabic text. For each input, classify the sentiment as positive, negative,\
+  \ or neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_arq_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_ary.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e601dc19ddd1bf960ee56e5345bbbcc8c6b84caf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_ary.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ary
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Moroccan Arabic text. For each input, classify the sentiment as positive, negative,\
+  \ or neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_ary_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_hau.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ab2adc1aaaf15ce9dced631d20ec2efb428ace5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Hausa text. For each input, classify the sentiment as positive, negative, or neutral.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness, satisfaction,\
+  \ or optimism. \nNegative: The text conveys disappointment, dissatisfaction, or\
+  \ pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_ibo.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dba7b17964cdc320243578f9e9249a00caea4a44
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Igbo text. For each input, classify the sentiment as positive, negative, or neutral.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness, satisfaction,\
+  \ or optimism. \nNegative: The text conveys disappointment, dissatisfaction, or\
+  \ pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_kin.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16ea6f0c5734b4795acb06b5d854d5cb33b97c05
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Kinyarwanda text. For each input, classify the sentiment as positive, negative,\
+  \ or neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_kin_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_orm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c61ea75ee6b448a7dcf30ecdfcef7328201379ae
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_orm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Oromo text. For each input, classify the sentiment as positive, negative, or neutral.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness, satisfaction,\
+  \ or optimism. \nNegative: The text conveys disappointment, dissatisfaction, or\
+  \ pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_orm_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_pcm.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6de78061a142a0bd80d69619a5eba4ce3756d616
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_pcm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Nigerian Pidgin text. For each input, classify the sentiment as positive, negative,\
+  \ or neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_pcm_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_por.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48b728d5dd38bb55632fd3fb22f37fbcbeb66eea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_por.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: por
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Mozambique Portuguese text. For each input, classify the sentiment as positive,\
+  \ negative, or neutral. Use the following guidelines: \n\n Positive: The text expresses\
+  \ happiness, satisfaction, or optimism. \nNegative: The text conveys disappointment,\
+  \ dissatisfaction, or pessimism. \nNeutral: The text is factual, objective, or without\
+  \ strong emotional undertones. \n\nIf the text contains both positive and negative\
+  \ sentiments, choose the dominant sentiment. For ambiguous or unclear sentiments,\
+  \ select the label that best reflects the overall tone. Please provide a single\
+  \ classification for each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_por_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_swa.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fee357ab47703dea57fe9b78a2d46da0f0212874
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Swahili text. For each input, classify the sentiment as positive, negative, or\
+  \ neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_tir.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47a67e1c9358682be41792e77ef9e85fb1735baf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_tir.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Tigrinya text. For each input, classify the sentiment as positive, negative, or\
+  \ neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_tir_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_tso.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f5705285e81a7d998dd77b44c9221697aaea435
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_tso.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: tso
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Xithonga text. For each input, classify the sentiment as positive, negative, or\
+  \ neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_tso_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_twi.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0b4fe03da64d5f1940dc9aeebe29de2cea09227
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Twi text. For each input, classify the sentiment as positive, negative, or neutral.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness, satisfaction,\
+  \ or optimism. \nNegative: The text conveys disappointment, dissatisfaction, or\
+  \ pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_twi_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_yor.yaml b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b96edb4104e85d01642b2ceca14fc59f2c296ecd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/afrisenti_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Yoruba text. For each input, classify the sentiment as positive, negative, or\
+  \ neutral. Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \nNeutral: The text is factual, objective, or without strong emotional\
+  \ undertones. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{tweet}} \nlabel: "
+include: afrisenti
+task: afrisenti_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/utils.py b/lm_eval/tasks/afrobench/afrisenti/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/afrisenti/prompt_5/xx.py b/lm_eval/tasks/afrobench/afrisenti/prompt_5/xx.py
new file mode 100644
index 0000000000000000000000000000000000000000..375facffa5030cdde562a9ab4474193a9d45f597
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/prompt_5/xx.py
@@ -0,0 +1,8 @@
+# data = load_dataset('HausaNLP/AfriSenti-Twitter', 'yor', trust_remote_code=True)
+# print(data)
+
+import torch
+
+
+print(torch.cuda.is_available())  # Should return True
+print(torch.cuda.device_count())
diff --git a/lm_eval/tasks/afrobench/afrisenti/utils.py b/lm_eval/tasks/afrobench/afrisenti/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5f9b74e2eb12db6a985c8428830933f8adcc936
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrisenti/utils.py
@@ -0,0 +1,124 @@
+import argparse
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Does this statement; {{tweet}} have a Neutral, Positive or Negative sentiment? Labels only",
+        "prompt_2": f"Does this {lang} statement; "
+        "'{{tweet}}' have a Neutral, Positive or Negative sentiment? Labels only",
+        "prompt_3": f"You are an assistant able to detect sentiments in tweets. \n\n"
+        f"Given the sentiment labels Neutral, Positive or Negative; what is "
+        f"the sentiment of the {lang} statement below? Return only the labels. "
+        "\n\ntext: {{tweet}} \nlabel:",
+        "prompt_4": "Label the following text as Neutral, Positive, or Negative. Provide only the label as your "
+        "response. \n\ntext: {{tweet}} \nlabel: ",
+        "prompt_5": f"You are tasked with performing sentiment classification on the following {lang} text. "
+        f"For each input, classify the sentiment as positive, negative, or neutral. "
+        f"Use the following guidelines: \n\n "
+        f"Positive: The text expresses happiness, satisfaction, or optimism. \n"
+        f"Negative: The text conveys disappointment, dissatisfaction, or pessimism. \n"
+        f"Neutral: The text is factual, objective, or without strong emotional undertones. \n\n"
+        f"If the text contains both positive and negative sentiments, choose the dominant sentiment. "
+        f"For ambiguous or unclear sentiments, select the label that best reflects the overall tone. "
+        "Please provide a single classification for each input.\n\ntext: {{tweet}} \nlabel: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "amh": "Amharic",
+        "arq": "Algerian Arabic",
+        "ary": "Moroccan Arabic",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "kin": "Kinyarwanda",
+        "orm": "Oromo",
+        "pcm": "Nigerian Pidgin",
+        "por": "Mozambique Portuguese",
+        "swa": "Swahili",
+        "tir": "Tigrinya",
+        "tso": "Xithonga",
+        "twi": "Twi",
+        "yor": "Yoruba",
+    }
+    for lang in languages.keys():
+        try:
+            file_name = f"afrisenti_{lang}.yaml"
+            task_name = f"afrisenti_{lang}_{mode}"
+            yaml_template = "afrisenti"
+            if int(mode.split("_")[-1]) > 1:
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": lang,
+                    "doc_to_text": prompt_func(mode, languages[lang]),
+                }
+            else:
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": lang,
+                }
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/afrobench-lite.yaml b/lm_eval/tasks/afrobench/afrobench-lite.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a23c050a2d09646492778e80dbc3a30dc281f580
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrobench-lite.yaml
@@ -0,0 +1,15 @@
+group: afrobench_lite
+task:
+  - afrimgsm_cot_tasks
+  - afrimmlu_tasks
+  - afrixnli_tasks
+  - belebele_tasks
+  - sib_tasks
+  - african_flores_tasks
+  - injongointent_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/afrobench.yaml b/lm_eval/tasks/afrobench/afrobench.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52234bef5cde6b5695fa6510019bcf37502ddd40
--- /dev/null
+++ b/lm_eval/tasks/afrobench/afrobench.yaml
@@ -0,0 +1,23 @@
+group: afrobench
+task:
+#  - adr_tasks
+##  - afrihate_tasks #dataset not publicly available yet
+#  - afrimgsm_cot_tasks
+#  - afrixnli_tasks
+#  - afrobench_xqa_tasks
+#  - afrobench_sentiment_tasks
+  - afrobench_MT_tasks
+#  - afrobench_TC_tasks
+#  - afrobench_mmlu_tasks
+#  - injongointent_tasks
+#  - masakhaner_tasks
+#  - masakhapos_tasks
+#  - RC_tasks
+#  - uhura_arc_easy_tasks
+#  - xlsum_tasks
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/belebele/README.md b/lm_eval/tasks/afrobench/belebele/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..10d46a44e2098e7e8aaadf57dbdfe5eb52156d7a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/README.md
@@ -0,0 +1,41 @@
+#
+
+## Paper
+Title: `The Belebele Benchmark: a Parallel Reading Comprehension Dataset in 122 Language Variants`
+
+Paper Link: https://aclanthology.org/2023.emnlp-main.862/
+
+## Abstract
+>Belebele is a multiple-choice machine reading comprehension (MRC) dataset spanning 122 language variants. This dataset enables the evaluation of mono- and multi-lingual models in high-, medium-, and low-resource languages. Each question has four multiple-choice answers and is linked to a short passage from the FLORES-200 dataset. The human annotation procedure was carefully curated to create questions that discriminate between different levels of generalizable language comprehension and is reinforced by extensive quality checks. While all questions directly relate to the passage, the English dataset on its own proves difficult enough to challenge state-of-the-art language models. Being fully parallel, this dataset enables direct comparison of model performance across all languages. Belebele opens up new avenues for evaluating and analyzing the multilingual abilities of language models and NLP systems.
+
+HomePage: https://github.com/facebookresearch/belebele
+
+### Citation
+
+```
+@inproceedings{bandarkar-etal-2024-belebele,
+    title = "The Belebele Benchmark: a Parallel Reading Comprehension Dataset in 122 Language Variants",
+    author = "Bandarkar, Lucas  and
+      Liang, Davis  and
+      Muller, Benjamin  and
+      Artetxe, Mikel  and
+      Shukla, Satya Narayan  and
+      Husa, Donald  and
+      Goyal, Naman  and
+      Krishnan, Abhinandan  and
+      Zettlemoyer, Luke  and
+      Khabsa, Madian",
+    editor = "Ku, Lun-Wei  and
+      Martins, Andre  and
+      Srikumar, Vivek",
+    booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
+    month = aug,
+    year = "2024",
+    address = "Bangkok, Thailand",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2024.acl-long.44/",
+    doi = "10.18653/v1/2024.acl-long.44",
+    pages = "749--775",
+    abstract = "We present Belebele, a multiple-choice machine reading comprehension (MRC) dataset spanning 122 language variants. Significantly expanding the language coverage of natural language understanding (NLU) benchmarks, this dataset enables the evaluation of text models in high-, medium-, and low-resource languages. Each question is based on a short passage from the FLORES-200 dataset and has four multiple-choice answers. The questions were carefully curated to discriminate between models with different levels of general language comprehension. The English dataset on its own proves difficult enough to challenge state-of-the-art language models. Being fully parallel, this dataset enables direct comparison of model performance across all languages. We use this dataset to evaluate the capabilities of multilingual masked language models (MLMs) and large language models (LLMs). We present extensive results and findings, notably that despite significant cross-lingual transfer in English-centric LLMs, much smaller MLMs pretrained on balanced multilingual data still understand far more languages. Overall, Belebele opens up new avenues for evaluating and analyzing the multilingual capabilities of NLP systems."
+}
+```
diff --git a/lm_eval/tasks/afrobench/belebele/belebele.yaml b/lm_eval/tasks/afrobench/belebele/belebele.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5c7d3a9dc4450ffbb152abd6eac2655c2cf2199c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/belebele.yaml
@@ -0,0 +1,13 @@
+group: belebele
+task:
+  - belebele_prompt_1
+  - belebele_prompt_2
+  - belebele_prompt_3
+  - belebele_prompt_4
+  - belebele_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele
new file mode 100644
index 0000000000000000000000000000000000000000..51553e0e077d968e1fca29e27783b225ccaf7323
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele
@@ -0,0 +1,23 @@
+tag:
+    - belebele_tasks
+    - belebele_prompt_1
+    - RC_tasks
+dataset_path: facebook/belebele
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_afr.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3a7c2b97208d5ac2ced4bbacc3192a290fa6ea2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_afr.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_afr_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_amh.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ee55e8eb291cd47abc402f7c4323d2f95caee1e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_amh.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_amh_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ary.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82f0d5230d4d24a77d381be67a22dd270255fea1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ary.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_ary_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_arz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38f8c3edc09bd33823c0a221bcfe8a6d7d758d91
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_arz.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_arz_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_bam.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2bc2d49f78a094ad11711baee3df33923077c89
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_bam.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_bam_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_eng.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ef1f0463d4c1b7c6661c570f00f9beb855cfd534
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_eng.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_fra.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f2513826f2808d242ced3d0b7278878bb846649
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_fra.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_fra_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_fuv.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b24422c03aa1f81b18bab14d8810c916afb8e367
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_fuv.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_fuv_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_gaz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b999f4a85fab866f0cbc6ccab5bc78ddd7a65bc5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_gaz.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_gaz_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_hau.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..933e90b50653a19529277c30e0c940130d656528
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_hau.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ibo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa17935c3b331e347ddc13512b46c80e484e24d9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ibo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_kea.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad535d498447fde8ee74423a80858d42a85f558c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_kea.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_kea_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_kin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de957a59b3cb665760f8c36c5968ce75c5b4271c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_kin.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_lin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b3247f065c8f5137f52af8965f18a2a23e942b64
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_lin.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_lin_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_lug.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b2ef7a14b8b1f566b1a55b6e8e02e12b327becb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_lug.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_luo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b667c1d66cff764f6d57cfa6ff698796b608e446
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_luo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_luo_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_nya.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c220c7738f369cc8255efc2b96c4d9654a5c33f7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_nya.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_nya_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_plt.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5c286e8517e6348798dc9c7807b1aec687c9ad1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_plt.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_plt_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_por.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ceba2310c5e483727eb5f4386058b666cd49620c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_por.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_por_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_sna.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eec0b1e19fe1bdd53a5add1d16b6b9a89e12a213
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_sna.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_sna_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_som.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24af555865acd5d2f8c48c5aa2814e401891ccd4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_som.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_som_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_sot.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10cde5be846773e764b762157bc528a5acf3fc1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_sot.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_sot_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ssw.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..032b4629b569fc340c12fc701dbc694fd8631d95
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_ssw.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_ssw_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_swa.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c4ae7b786261ef4e7b48bc35c077207a232bb31
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_swa.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tir.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b62e848daa5776384c780acce0b270cf88607c5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tir.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_tir_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tsn.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..147a1c9857be7f9557759765a7a6d5d490564bf7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tsn.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_tsn_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tso.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..869c50153d289b01ee408656701d0fb14dcb679d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_tso.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_tso_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_wol.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1aed1e5ac7954377f0e594e5e2f3b8260d140c62
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_wol.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_wol_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_xho.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..549560ac1d4cd2181750d6e587f355f743d52cef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_xho.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_xho_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_yor.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70c55eba1c99673595f00b297237391dd767c74c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_zul.yaml b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..257396921dbe6be6b5954170511093b382e9ba9b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_1/belebele_zul.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: 'P: {{flores_passage}}
+
+  Q: {{question.strip()}}
+
+  A: {{mc_answer1}}
+
+  B: {{mc_answer2}}
+
+  C: {{mc_answer3}}
+
+  D: {{mc_answer4}}
+
+  Please choose the correct answer from the options above:'
+include: belebele
+task: belebele_zul_prompt_1
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele
new file mode 100644
index 0000000000000000000000000000000000000000..75f673a425116056c7516d0b7e8f54844f0c9716
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele
@@ -0,0 +1,23 @@
+tag:
+    - belebele_tasks
+    - belebele_prompt_2
+    - RC_tasks
+dataset_path: facebook/belebele
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_afr.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7c7180959511b34bc8582976aa14f1b4327ad1a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_afr.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_afr_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_amh.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8f95cac2a0f27d8a1699215855167bffe3b38a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_amh.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_amh_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ary.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12a784902ad61589942794fe4661375154909999
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ary.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_ary_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_arz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a975b485cd567a5f574add79bc5651e58d533219
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_arz.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_arz_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_bam.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..814d32b5483290ef00380b30ce0797492f696497
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_bam.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_bam_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_eng.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..510f1fbb0d2070f3c547fbe76f3c5a69e4ba31f0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_eng.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_fra.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..157433727099d0bc545ddb1e821aa80fbb37f3a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_fra.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_fra_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_fuv.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3bf02ff0fa62983d5e4c9c8e63129b1e40da333a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_fuv.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_fuv_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_gaz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc2b2704e382ea4eaa32276e369ad43b8fa298b1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_gaz.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_gaz_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_hau.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7af70e03fa6ab6f868205f2ad2b5abc4727a0dfb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_hau.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ibo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..92d895803b9927d2a8288a7b9ff4493f23476c77
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ibo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_kea.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f1dcf9117db0c0ef3065975c7bce8abab626ae1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_kea.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_kea_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_kin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e686e477dca84fe5fa1c666c553a0ac1c0efe1b5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_kin.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_lin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..544eb9ddec49e0fbca3c29d56a3108db039d8979
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_lin.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_lin_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_lug.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe97881b310795720928c4c43241c076297903c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_lug.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_luo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7bdde48a01bd9a08b75961f43bff0438889228c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_luo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_luo_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_nya.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..212c0635342943d90922467829854dd051c72298
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_nya.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_nya_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_plt.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57e71ac9c186e22ac235f85781ffb6400e41eb11
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_plt.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_plt_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_por.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f9be02a8e500875dfc2f5b20c1d0f6d3767dce93
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_por.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_por_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_sna.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a5ad43a21f7a2bd32933b47e9d5aa10a623cff3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_sna.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_sna_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_som.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d551d1d5c501ddc309b37b8688898359e44ecc3e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_som.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_som_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_sot.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18780c9017658355fc160320784f848867bf03cc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_sot.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_sot_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ssw.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e6bd0a69ac685b18d7d03eaa2deeb120913904ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_ssw.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_ssw_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_swa.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bdfd132cd8ac856e157d2dd502f7f030e95c0c4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_swa.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tir.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1fba28cba714edf37a5169da7ba427543b1dd8e2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tir.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_tir_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tsn.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..337e08ceab543cad55a4ca0206bbf70590422642
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tsn.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_tsn_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tso.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a0e24e4a4118e26150288d0f6842ec2baeec12a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_tso.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_tso_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_wol.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..439148106cf268746b7e41695a752a9ba8422a6c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_wol.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_wol_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_xho.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2077614892f77249687bf7b8889a211212325e83
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_xho.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_xho_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_yor.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f9684f05db11c39ea1e000f1b31ce04b3693cc01
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_zul.yaml b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c81180e17c7f71799e36536a4abcdb707ef7652d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_2/belebele_zul.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: 'Passage: {{flores_passage}}
+
+  Question: {{question.strip()}}
+
+  1: {{mc_answer1}}
+
+  2: {{mc_answer2}}
+
+  3: {{mc_answer3}}
+
+  4: {{mc_answer4}}
+
+  Please select the correct answer from the given choices:'
+include: belebele
+task: belebele_zul_prompt_2
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele
new file mode 100644
index 0000000000000000000000000000000000000000..a27ea5fb3a06cf7d949c6bc464c721a697e8d981
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele
@@ -0,0 +1,23 @@
+tag:
+    - belebele_tasks
+    - belebele_prompt_3
+    - RC_tasks
+dataset_path: facebook/belebele
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_afr.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c296cc93f26c6bb91bb7a844b22c8827ebc57fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_afr.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_afr_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_amh.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e6f2fd77b913715ca3d24c8cf209a46dca1398b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_amh.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_amh_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ary.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..203bf1c9239b45803f91c601047567aaa01c8ed9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ary.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_ary_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_arz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97f13672f57f52613728a862805273451e78b5c7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_arz.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_arz_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_bam.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b5d3415a3008831a38f2f6e3abb09eb79ba072e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_bam.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_bam_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_eng.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ceb5270ec60454b3322a5058307bfce92caa3f8d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_eng.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_fra.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..affc5d12fd30c95380804fdeb538a50fd6bcf582
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_fra.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_fra_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_fuv.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ff7bfdad8e19ae4d4f1bc3a94e6158721243b88
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_fuv.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_fuv_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_gaz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c067e7c1972edd8109312d4a313fa81d62f3effc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_gaz.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_gaz_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_hau.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..689724b4f8593af87018cef13c7c596868df7c2b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_hau.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ibo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5eaacad2b56cae67a575640a79df06e87624156
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ibo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_kea.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c24b2ae7a28c976c7d3980ad86b61148aa0d1635
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_kea.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_kea_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_kin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae0a821fc67dd16965c9e46dc0f8724c508dc7bb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_kin.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_lin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93e1a5b5438d97012207f12d9bc88a639252644f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_lin.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_lin_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_lug.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..724947d41b746ee2507c1340003dbda425c89ddb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_lug.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_luo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21b4b8f730f3f24abaf93b0a71badee8f6c16819
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_luo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_luo_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_nya.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db045f723e91ed1e04317ad29ff7c7ee5bafa274
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_nya.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_nya_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_plt.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..946e417946c1c066f2c552d42e266fac048bf795
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_plt.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_plt_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_por.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72ca651b8c251e1dfd304c9e6312b8230c2e2b1a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_por.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_por_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_sna.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f5d810ac1ec1bca42f68bf73da7a9ba10a2315a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_sna.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_sna_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_som.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d3a7c4e46094d0eaa70553ecf1c6ff95b557379
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_som.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_som_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_sot.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3db32d81ad3f709a2889da57042411ce00c8f294
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_sot.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_sot_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ssw.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..888ecf8423b80a3ac57980a91658c0f0c6a83254
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_ssw.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_ssw_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_swa.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec8127aae7cb28ddcfae818b80c6d58bb71c70fb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_swa.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tir.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab3545445999b415cc5a81864672db07f4cf548d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tir.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_tir_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tsn.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..019a95fe9a49e4c3398d01d84af7fcfc72cf1214
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tsn.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_tsn_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tso.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fcc97c4f09bc6abc5129d37ef2dabbbbcd6b71b8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_tso.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_tso_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_wol.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20af7b3c57d384f4f63461f47240d9e4cadb91e5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_wol.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_wol_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_xho.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a205da905597abd079e2a8131b37e63c2d1a13f6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_xho.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_xho_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_yor.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cdcbb8c244275b55670b542247d8db79c6b3bf54
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_zul.yaml b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da1ef4239b68c83c5ceaa1988a85329275db97fb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_3/belebele_zul.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: 'Context: {{flores_passage}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{mc_answer1}}
+
+  Option B: {{mc_answer2}}
+
+  Option C: {{mc_answer3}}
+
+  Option D: {{mc_answer4}}
+
+  Please indicate the correct option from the list above:'
+include: belebele
+task: belebele_zul_prompt_3
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele
new file mode 100644
index 0000000000000000000000000000000000000000..cc28101b1072e9ef48202c143592df0ff2f8286b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele
@@ -0,0 +1,23 @@
+tag:
+    - belebele_tasks
+    - belebele_prompt_4
+    - RC_tasks
+dataset_path: facebook/belebele
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_afr.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..325cb85f391fb709081167cb70ea97d5664d2c86
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_afr.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_afr_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_amh.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..02eb0683fb6fdd1a603f3c7d864f58a3ada9c458
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_amh.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_amh_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ary.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c7899d23cf3bb34c56bb5c1b866397deb6d96be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ary.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_ary_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_arz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5acc3222b7bae84c471027e21afb2b6930ac7848
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_arz.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_arz_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_bam.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..466dddff4989fe2ba2e26959fd3f47d40ebba425
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_bam.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_bam_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_eng.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21dfa3ea83d46444712c98657b68cd2d4416ba1c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_eng.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_eng_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_fra.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7fea6f1ba3e5792d2ebe755111c5d924ad07999
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_fra.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_fra_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_fuv.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77fa7798b1b079a65146976e0ecbf68ce94504cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_fuv.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_fuv_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_gaz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9e54eb9b9a74a929ecc7fff7313bb2783f7a8d6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_gaz.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_gaz_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_hau.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45dbfc5730a25b7945cc06efb258da867f86c690
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_hau.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ibo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb58d8a079a7c0fd1fbeb30976361190d91f067f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ibo.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_kea.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8ce83009df730072f0d40dd8090d103c2825a9d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_kea.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_kea_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_kin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..028de73a190394f6c7c7de22f24060294cf1da3d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_kin.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_kin_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_lin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95cad4e2f44fc85e0dd9276c0024de0f1d2be617
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_lin.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_lin_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_lug.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e7b6a20c34b08afb2d6f569a07a275203e48ae1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_lug.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_lug_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_luo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce5ec04ac9e580585168a7a26b7f180ccba450f9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_luo.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_luo_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_nya.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..26d2f699da8daad91726627cd37a6a8d92965faf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_nya.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_nya_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_plt.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ffdf1460a464cdc0a14459b8d48178b4246e6ed4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_plt.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_plt_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_por.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8c06382b61eaec15b027533a5553c77d2974180
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_por.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_por_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_sna.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3869a67959fbff48a93174161750246ee0ccc510
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_sna.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_sna_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_som.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d7be50ca6bd926924a79dc261156b7d00775966
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_som.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_som_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_sot.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec30bccc32adc5a5d7a7d6789a4dfb3941d3cc4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_sot.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_sot_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ssw.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..510e7b8f2a6d7528fc8162f00bdbaa7b6a89ac4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_ssw.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_ssw_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_swa.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..afbdcad238a03bf388dd4ddb070f159cf41782ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_swa.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tir.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..827f1f3614be4b1945bc8495ef52729b6c0778c0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tir.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_tir_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tsn.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a8f0a28d86b665bc913b1760dc378e8e4bf4146d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tsn.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_tsn_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tso.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f1a87faa18a1af4729eca19d50b6a86bda83771
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_tso.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_tso_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_wol.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e0f6a629eea7632d7879943813fbd8964de5b8f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_wol.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_wol_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_xho.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3510a4d82e5975dde9272cd83e856780d7a3766
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_xho.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_xho_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_yor.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..526e24ef92ecfbada52046a0d72e878ef939e86c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_yor.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_zul.yaml b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7472e5213b9a5a016818e89eaf901611423131b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_4/belebele_zul.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: '{{flores_passage}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{mc_answer1}}
+
+  B) {{mc_answer2}}
+
+  C) {{mc_answer3}}
+
+  D) {{mc_answer4}}
+
+  Please provide the correct answer from the choices given:'
+include: belebele
+task: belebele_zul_prompt_4
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele
new file mode 100644
index 0000000000000000000000000000000000000000..0d85bf5172c2e8b9408448196191f3b7d40367a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele
@@ -0,0 +1,23 @@
+tag:
+    - belebele_tasks
+    - belebele_prompt_5
+    - RC_tasks
+dataset_path: facebook/belebele
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['1', '2', '3', '4'].index(correct_answer_num)}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_afr.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..01a724719757a2655800615e982a1ff1272dc438
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_afr.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_afr_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_amh.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f707d7c38153cd8304f7f02e331dc00858cb59c3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_amh.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_amh_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ary.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2cf68405132141da73c1c4c0085bffa47c6aab41
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ary.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_ary_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_arz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c0314a96d451eb0cbbe04ef2036fba63d7927f5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_arz.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_arz_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_bam.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..704c41a5ec8a68081ac33ea39b72f962e75e130f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_bam.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_bam_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_eng.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62617bf1e3266c46897623097836dbc3337add03
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_eng.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_eng_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_fra.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05131046414169281a6bdb6046cbfef3f461939f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_fra.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_fra_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_fuv.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..35103b5c19223a0a11e6a595b3f28fa3635455e9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_fuv.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_fuv_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_gaz.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3822a5886fc0a179e95140208ba7751306c09619
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_gaz.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_gaz_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_hau.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a0a53114456c6223050d8a444052e2a15b3e2aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_hau.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ibo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5a8e29bcd708791b528111da1b3301586825561
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ibo.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_kea.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45fb47ad9854a75127428774a10dbd6eb12d83a9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_kea.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_kea_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_kin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bd9a07b8853165e8b5022a2d110cd450f2708ce
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_kin.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_kin_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_lin.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff6493b711b0126ed2b32ad0d7d7f668c5c71482
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_lin.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_lin_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_lug.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b64c68ba1b3a8aa1e026cb191dcf96873285d40
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_lug.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_lug_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_luo.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f81859aae5913c79df2bb245fe48016dba0920a6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_luo.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_luo_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_nya.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c957760af620a7bf10726fb9611baa5758d1a03d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_nya.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_nya_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_plt.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..baad68ab37cf0a5c5d34f9c16a0fdfaa473e5968
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_plt.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_plt_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_por.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13b4e63948d0bc6ba9c886490a8585d2339bd357
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_por.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_por_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_sna.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd4fc080074beb21801196d5df19ea59e6928b4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_sna.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_sna_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_som.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dfa40665cc2874333872503f880c4c373c03b52
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_som.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_som_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_sot.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c78c862a08e3e6e6968d9e3e039a6cd67c99e978
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_sot.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_sot_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ssw.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2e8b96f93a43c64e36d0f2b8199524344511b70
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_ssw.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_ssw_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_swa.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a44af344142293043db80d9f55140569b7fdebf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_swa.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tir.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ef9af2ade11b49af64195a00811be7bf69b34d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tir.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_tir_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tsn.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0de5669b2a6031c7a5960bc174951d99cdc02502
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tsn.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_tsn_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tso.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..92def0f429f0c7517c8904a8a2ae86cc2f534653
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_tso.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_tso_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_wol.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10192b8a69a44faf3b155a16287d3da15b0c4c5f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_wol.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_wol_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_xho.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ea12584e13ae4b4e19ac5b9c1fcc3b54ae0f950
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_xho.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_xho_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_yor.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c69e05cee5d26e91892e9303ad09f06856c254d1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_yor.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_zul.yaml b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3c6905f9803bba171b95b48c540f730d16158bb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/prompt_5/belebele_zul.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: 'Read the passage: {{flores_passage}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{mc_answer1}}
+
+  B. {{mc_answer2}}
+
+  C. {{mc_answer3}}
+
+  D. {{mc_answer4}}
+
+  Please choose the correct option from the above list:'
+include: belebele
+task: belebele_zul_prompt_5
diff --git a/lm_eval/tasks/afrobench/belebele/utils.py b/lm_eval/tasks/afrobench/belebele/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7654a6cfe7974b352446e8c71c5740fb9e45f9f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/belebele/utils.py
@@ -0,0 +1,155 @@
+import argparse
+import os
+
+import yaml
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "P: {{flores_passage}}\nQ: {{question.strip()}}\nA: {{mc_answer1}}\nB: {{mc_answer2}}\nC: {{mc_answer3}}\nD: {{mc_answer4}}\nPlease choose the correct answer from the options above:",
+        "prompt_2": "Passage: {{flores_passage}}\nQuestion: {{question.strip()}}\n1: {{mc_answer1}}\n2: {{mc_answer2}}\n3: {{mc_answer3}}\n4: {{mc_answer4}}\nPlease select the correct answer from the given choices:",
+        "prompt_3": "Context: {{flores_passage}}\nQuery: {{question.strip()}}\nOption A: {{mc_answer1}}\nOption B: {{mc_answer2}}\nOption C: {{mc_answer3}}\nOption D: {{mc_answer4}}\nPlease indicate the correct option from the list above:",
+        "prompt_4": "{{flores_passage}}\nBased on the above passage, answer the following question:\n{{question.strip()}}\nChoices:\nA) {{mc_answer1}}\nB) {{mc_answer2}}\nC) {{mc_answer3}}\nD) {{mc_answer4}}\nPlease provide the correct answer from the choices given:",
+        "prompt_5": "Read the passage: {{flores_passage}}\nThen answer the question: {{question.strip()}}\nOptions:\nA. {{mc_answer1}}\nB. {{mc_answer2}}\nC. {{mc_answer3}}\nD. {{mc_answer4}}\nPlease choose the correct option from the above list:",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "afr": "Afrikaans",
+        "amh": "Amharic",
+        "ary": "Moroccan Arabic",
+        "arz": "Egyptian Arabic",
+        "bam": "Bambara",
+        "eng": "English",
+        "fra": "French",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "lin": "Lingala",
+        "por": "Portuguese",
+        "sna": "Shona",
+        "swa": "Swahili",
+        "tir": "Tigrinya",
+        "tso": "Tsonga",
+        "tsn": "Tswana",
+        "wol": "Wolof",
+        "xho": "Xhosa",
+        "yor": "Yoruba",
+        "zul": "Zulu",
+        "ssw": "Swati",
+        "sot": "Southern Sotho",
+        "som": "Somali",
+        "plt": "Plateau Malagasy",
+        "nya": "Nyanja",
+        "luo": "Luo",
+        "lug": "Luganda",
+        "kin": "Kinyarwanda",
+        "kea": "Kabuverdianu",
+        "gaz": "Oromo",
+        "fuv": "Nigerian Fulfulde",
+    }
+
+    lang_2_dataset_lang_code = {
+        "afr": "afr_Latn",
+        "amh": "amh_Ethi",
+        "ary": "ary_Arab",
+        "arz": "arz_Arab",
+        "bam": "bam_Latn",
+        "eng": "eng_Latn",
+        "fra": "fra_Latn",
+        "hau": "hau_Latn",
+        "ibo": "ibo_Latn",
+        "lin": "lin_Latn",
+        "por": "por_Latn",
+        "sna": "sna_Latn",
+        "swa": "swh_Latn",
+        "tir": "tir_Ethi",
+        "tso": "tso_Latn",
+        "tsn": "tsn_Latn",
+        "wol": "wol_Latn",
+        "xho": "xho_Latn",
+        "yor": "yor_Latn",
+        "zul": "zul_Latn",
+        "ssw": "ssw_Latn",
+        "sot": "sot_Latn",
+        "som": "som_Latn",
+        "plt": "plt_Latn",
+        "nya": "nya_Latn",
+        "luo": "luo_Latn",
+        "lug": "lug_Latn",
+        "kin": "kin_Latn",
+        "kea": "kea_Latn",
+        "gaz": "gaz_Latn",
+        "fuv": "fuv_Latn",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"belebele_{lang}.yaml"
+            task_name = f"belebele_{lang}_{mode}"
+            yaml_template = "belebele"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang_2_dataset_lang_code[lang],
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_5",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/flores/README.md b/lm_eval/tasks/afrobench/flores/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ccf433a9f884576ef412148ea67e1a07c86bea30
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/README.md
@@ -0,0 +1,31 @@
+#
+
+## Paper
+Title: `The FLORES-200 Evaluation Benchmark for Low-Resource and Multilingual Machine Translation`
+
+Paper Link: https://arxiv.org/abs/2207.04672
+
+HomePage: https://huggingface.co/datasets/facebook/flores
+
+### Citation
+
+```
+@article{nllb2022,
+  author    = {NLLB Team, Marta R. Costa-jussà, James Cross, Onur Çelebi, Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi,  Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula, Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews, Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers, Safiyyah Saleem, Holger Schwenk, Jeff Wang},
+  title     = {No Language Left Behind: Scaling Human-Centered Machine Translation},
+  year      = {2022}
+}
+
+@inproceedings{,
+  title={The FLORES-101  Evaluation Benchmark for Low-Resource and Multilingual Machine Translation},
+  author={Goyal, Naman and Gao, Cynthia and Chaudhary, Vishrav and Chen, Peng-Jen and Wenzek, Guillaume and Ju, Da and Krishnan, Sanjana and Ranzato, Marc'Aurelio and Guzm\'{a}n, Francisco and Fan, Angela},
+  year={2021}
+}
+
+@inproceedings{,
+  title={Two New Evaluation Datasets for Low-Resource Machine Translation: Nepali-English and Sinhala-English},
+  author={Guzm\'{a}n, Francisco and Chen, Peng-Jen and Ott, Myle and Pino, Juan and Lample, Guillaume and Koehn, Philipp and Chaudhary, Vishrav and Ranzato, Marc'Aurelio},
+  journal={arXiv preprint arXiv:1902.01382},
+  year={2019}
+}
+```
diff --git a/lm_eval/tasks/afrobench/flores/flores.yaml b/lm_eval/tasks/afrobench/flores/flores.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09b6e39274a4a686d20f742927b9a2740c2ef59f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/flores.yaml
@@ -0,0 +1,14 @@
+group: african_flores
+task:
+  - flores_eng-afr_prompt_1
+  - flores_eng-afr_prompt_2
+  - flores_eng-afr_prompt_3
+  - flores_afr-eng_prompt_1
+  - flores_afr-eng_prompt_2
+  - flores_afr-eng_prompt_3
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/flores/gen_utils.py b/lm_eval/tasks/afrobench/flores/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..37e22e13d6b024976e9198df78dfa7ae81845e8a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/gen_utils.py
@@ -0,0 +1,202 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang, lang_dict):
+    language_column_name = f"sentence_{lang}"
+    prompt_map = {
+        "prompt_1": f"{lang_dict[lang]}: {{{{{language_column_name}}}}} \nEnglish: ",
+        "prompt_1_reverse": f"English: {{{{sentence_eng_Latn}}}} \n{lang_dict[lang]}: ",
+        "prompt_2": f"You are a translation expert. Translate the following {lang_dict[lang]} sentences to English \n"
+        f"{lang_dict[lang]}: {{{{{language_column_name}}}}}\nEnglish: ",
+        "prompt_2_reverse": f"You are a translation expert. Translate the following English sentences to "
+        f"{lang_dict[lang]} "
+        "\nEnglish: {{sentence_eng_Latn}} "
+        f"\n{lang_dict[lang]}: ",
+        "prompt_3": f"As a {lang_dict[lang]} and English linguist, translate the following {lang_dict[lang]} sentences "
+        f"to English \n{lang_dict[lang]}: {{{{{language_column_name}}}}}\nEnglish: ",
+        "prompt_3_reverse": f"As a {lang_dict[lang]} and English linguist, translate the following English sentences to "
+        f"{lang_dict[lang]} "
+        "\nEnglish: {{sentence_eng_Latn}} "
+        f"\n{lang_dict[lang]}: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse: bool) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "ace_Latn": "Acehnese (Latin script)",
+        "ace_Arab": "Acehnese (Arabic script)",
+        "acq_Arab": "Ta’izzi-Adeni Arabic",
+        "aeb_Arab": "Tunisian Arabic",
+        "afr_Latn": "Afrikaans",
+        "aka_Latn": "Akan",
+        "amh_Ethi": "Amharic",
+        "ary_Arab": "Moroccan Arabic",
+        "arz_Arab": "Egyptian Arabic",
+        "bam_Latn": "Bambara",
+        "ban_Latn": "Balinese",
+        "bem_Latn": "Bemba",
+        "cjk_Latn": "Chokwe",
+        "dik_Latn": "Southwestern Dinka",
+        "dyu_Latn": "Dyula",
+        "ewe_Latn": "Ewe",
+        "fon_Latn": "Fon",
+        "fra_Latn": "French",
+        "fuv_Latn": "Nigerian Fulfulde",
+        "hau_Latn": "Hausa",
+        "ibo_Latn": "Igbo",
+        "kab_Latn": "Kabyle",
+        "kam_Latn": "Kamba",
+        "knc_Arab": "Central Kanuri (Arabic script)",
+        "knc_Latn": "Central Kanuri (Latin script)",
+        "kbp_Latn": "Kabiyè",
+        "kea_Latn": "Kabuverdianu",
+        "kik_Latn": "Kikuyu",
+        "kin_Latn": "Kinyarwanda",
+        "kmb_Latn": "Kimbundu",
+        "kon_Latn": "Kikongo",
+        "lin_Latn": "Lingala",
+        "lua_Latn": "Luba-Kasai",
+        "lug_Latn": "Luganda",
+        "luo_Latn": "Luo",
+        "plt_Latn": "Plateau Malagasy",
+        "mos_Latn": "Mossi",
+        "nso_Latn": "Northern Sotho",
+        "nus_Latn": "Nuer",
+        "nya_Latn": "Nyanja",
+        "gaz_Latn": "Oromo",
+        "run_Latn": "Rundi",
+        "sag_Latn": "Sango",
+        "sna_Latn": "Shona",
+        "som_Latn": "Somali",
+        "sot_Latn": "Southern Sotho",
+        "ssw_Latn": "Swati",
+        "sun_Latn": "Sundanese",
+        "swh_Latn": "Swahili",
+        "tir_Ethi": "Tigrinya",
+        "taq_Latn": "Tamasheq",
+        "taq_Tfng": "Tamasheq (Tifinagh script)",
+        "tsn_Latn": "Setswana",
+        "tso_Latn": "Tsonga",
+        "tum_Latn": "Tumbuka",
+        "twi_Latn": "Twi",
+        "tzm_Tfng": "Central Atlas Tamazight",
+        "umb_Latn": "Umbundu",
+        "wol_Latn": "Wolof",
+        "xho_Latn": "Xhosa",
+        "yor_Latn": "Yoruba",
+        "zul_Latn": "Zulu",
+    }
+
+    for lang in languages.keys():
+        try:
+            if not reverse:
+                file_name = f"flores_{lang}-eng_Latn.yaml"
+                task_name = f"flores_{lang}-eng_Latn_{mode}"
+                yaml_template = "flores"
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": f"{lang}-eng_Latn",
+                    "doc_to_target": "sentence_eng_Latn",
+                    "doc_to_text": prompt_func(mode, lang, languages),
+                }
+                os.makedirs(f"{output_dir}/{mode}/african-english", exist_ok=True)
+                with open(
+                    f"{output_dir}/{mode}/african-english/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
+                ) as f:
+                    f.write("# Generated by utils.py\n")
+                    yaml.dump(
+                        yaml_details,
+                        f,
+                        allow_unicode=True,
+                    )
+            else:
+                file_name = f"flores_eng_Latn-{lang}.yaml"
+                task_name = f"flores_eng_Latn-{lang}_{mode}"
+                yaml_template = "flores"
+                # mode_reverse = f"{mode}_reverse"
+                yaml_details = {
+                    "include": yaml_template,
+                    "task": task_name,
+                    "dataset_name": f"eng_Latn-{lang}",
+                    "doc_to_target": f"sentence_{lang}",
+                    "doc_to_text": prompt_func(f"{mode}_reverse", lang, languages),
+                }
+                os.makedirs(f"{output_dir}/{mode}/english-african", exist_ok=True)
+                with open(
+                    f"{output_dir}/{mode}/english-african/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
+                ) as f:
+                    f.write("# Generated by utils.py\n")
+                    yaml.dump(
+                        yaml_details,
+                        f,
+                        allow_unicode=True,
+                    )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3"],
+        help="Prompt number",
+    )
+    parser.add_argument(
+        "--reverse",
+        default=True,
+        choices=[True, False],
+        help="Reverse the translation direction",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(
+        output_dir=args.output_dir,
+        overwrite=args.overwrite,
+        mode=args.mode,
+        reverse=args.reverse,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores
new file mode 100644
index 0000000000000000000000000000000000000000..c25cf195cd032014435335eadf13e102f47598f9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores
@@ -0,0 +1,27 @@
+tag:
+- african_flores_tasks
+- flores_afr-eng
+- flores_afr-eng_prompt_1
+- afrobench_MT_tasks
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "**"
+    - </s>
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ace_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ace_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c558249774e6182755078db81154e3d88db656c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ace_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ace_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Acehnese (Arabic script): {{sentence_ace_Arab}} \nEnglish: "
+include: flores
+task: flores_ace_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ace_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ace_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f0a6ee27cfc218e297ccaf05ab7d0bcef5da57b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ace_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ace_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Acehnese (Latin script): {{sentence_ace_Latn}} \nEnglish: "
+include: flores
+task: flores_ace_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_acq_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_acq_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3634e7a66c7b11be9a0450f6f5ab953707897eb1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_acq_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: acq_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Ta’izzi-Adeni Arabic: {{sentence_acq_Arab}} \nEnglish: "
+include: flores
+task: flores_acq_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_aeb_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_aeb_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53636d7c01d92e87b80da4bd6656b7740d0f11a6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_aeb_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: aeb_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tunisian Arabic: {{sentence_aeb_Arab}} \nEnglish: "
+include: flores
+task: flores_aeb_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_afr_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_afr_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ac14a0c04b362f925e578a30a9eb615a6bc1fed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_afr_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: afr_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Afrikaans: {{sentence_afr_Latn}} \nEnglish: "
+include: flores
+task: flores_afr_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_aka_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_aka_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3caf192676374f70a707c77d84bb6eac1deacb5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_aka_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: aka_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Akan: {{sentence_aka_Latn}} \nEnglish: "
+include: flores
+task: flores_aka_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_amh_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_amh_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c0be0828a5110df25911b503c0db29b2fe6dbb3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_amh_Ethi-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh_Ethi-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Amharic: {{sentence_amh_Ethi}} \nEnglish: "
+include: flores
+task: flores_amh_Ethi-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ary_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ary_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bcd452d6e86cf669bcbc97b7216d72dbeb37ffd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ary_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ary_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Moroccan Arabic: {{sentence_ary_Arab}} \nEnglish: "
+include: flores
+task: flores_ary_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_arz_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_arz_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72552bab1ce9b04feb4619a28e4a424b1bdb99d3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_arz_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: arz_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Egyptian Arabic: {{sentence_arz_Arab}} \nEnglish: "
+include: flores
+task: flores_arz_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_bam_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_bam_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14e8a1c74fb7f9bcdd47703121bc127420d5cf3a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_bam_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: bam_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Bambara: {{sentence_bam_Latn}} \nEnglish: "
+include: flores
+task: flores_bam_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ban_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ban_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54a582446ec44263f7020676a7e5fa3eee88e780
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ban_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ban_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Balinese: {{sentence_ban_Latn}} \nEnglish: "
+include: flores
+task: flores_ban_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_bem_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_bem_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53bbe221d7ec3a98f4eedeb9fdcfd49a2d872198
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_bem_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: bem_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Bemba: {{sentence_bem_Latn}} \nEnglish: "
+include: flores
+task: flores_bem_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_cjk_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_cjk_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63994d04d0dfc9ca7d8418835dcd47abf79d5031
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_cjk_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: cjk_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Chokwe: {{sentence_cjk_Latn}} \nEnglish: "
+include: flores
+task: flores_cjk_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_dik_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_dik_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd9022b53f0325804b07bf5fb8c222a37c5eccde
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_dik_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: dik_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Southwestern Dinka: {{sentence_dik_Latn}} \nEnglish: "
+include: flores
+task: flores_dik_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_dyu_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_dyu_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e25e23d89d7090ec09c68af8c83705dd6a43d7d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_dyu_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: dyu_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Dyula: {{sentence_dyu_Latn}} \nEnglish: "
+include: flores
+task: flores_dyu_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ewe_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ewe_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fffa31fcd93a02581b8e70e20d2b2fd84803365c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ewe_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Ewe: {{sentence_ewe_Latn}} \nEnglish: "
+include: flores
+task: flores_ewe_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fon_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fon_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..70c9bfbe0f59666124b03c55432d4981472da9d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fon_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fon_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Fon: {{sentence_fon_Latn}} \nEnglish: "
+include: flores
+task: flores_fon_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fra_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fra_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c515a8f6adff914e6c237a1d637d8a589bc976f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fra_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "French: {{sentence_fra_Latn}} \nEnglish: "
+include: flores
+task: flores_fra_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fuv_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fuv_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a162567753f9b6ee34d52f5ac06233b54973eac
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_fuv_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fuv_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Nigerian Fulfulde: {{sentence_fuv_Latn}} \nEnglish: "
+include: flores
+task: flores_fuv_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_gaz_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_gaz_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec443459d6b9ac698106c6dc2c500d2b897557f1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_gaz_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: gaz_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Oromo: {{sentence_gaz_Latn}} \nEnglish: "
+include: flores
+task: flores_gaz_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_hau_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_hau_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d518b5122fa8ebf09188fd1eca8f6f5e2e23983
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_hau_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Hausa: {{sentence_hau_Latn}} \nEnglish: "
+include: flores
+task: flores_hau_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ibo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ibo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c121ae73d95a5acffa72c27af04c9c1b16a7b43
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ibo_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Igbo: {{sentence_ibo_Latn}} \nEnglish: "
+include: flores
+task: flores_ibo_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kab_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kab_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..42c625488a60ed854f9769636ddc208d8d7d2e0c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kab_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kab_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kabyle: {{sentence_kab_Latn}} \nEnglish: "
+include: flores
+task: flores_kab_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kam_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kam_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7d10cc570d7c338e7928a36be4a3026cffaf159
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kam_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kam_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kamba: {{sentence_kam_Latn}} \nEnglish: "
+include: flores
+task: flores_kam_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kbp_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kbp_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43cc5e32a272d14bc8549d28f6f8784ab1e968dd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kbp_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kbp_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kabiyè: {{sentence_kbp_Latn}} \nEnglish: "
+include: flores
+task: flores_kbp_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kea_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kea_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c894681ef571f72d6edfa953a9d0aeaafb5dcc9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kea_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kea_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kabuverdianu: {{sentence_kea_Latn}} \nEnglish: "
+include: flores
+task: flores_kea_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kik_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kik_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dbdff8e215e247fbc4b0061154f3c66903aad80c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kik_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kik_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kikuyu: {{sentence_kik_Latn}} \nEnglish: "
+include: flores
+task: flores_kik_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b11194a98eacd84551995aa1506993a9c8a52bf6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kin_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kinyarwanda: {{sentence_kin_Latn}} \nEnglish: "
+include: flores
+task: flores_kin_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kmb_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kmb_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..258b847d28294196b7c4d7455320e24d0ad2a59a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kmb_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kmb_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kimbundu: {{sentence_kmb_Latn}} \nEnglish: "
+include: flores
+task: flores_kmb_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_knc_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_knc_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..642dfc6f891572f65037299b8f3a8381f51f0421
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_knc_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: knc_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Central Kanuri (Arabic script): {{sentence_knc_Arab}} \nEnglish: "
+include: flores
+task: flores_knc_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_knc_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_knc_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f904da712bda335eab0ccc0e7036b8190caf93e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_knc_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: knc_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Central Kanuri (Latin script): {{sentence_knc_Latn}} \nEnglish: "
+include: flores
+task: flores_knc_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kon_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kon_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54fce1f8da44e9b2c889e71e8a8d9f5eea4b3ef7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_kon_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kon_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kikongo: {{sentence_kon_Latn}} \nEnglish: "
+include: flores
+task: flores_kon_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41494a7263855a44fd217ac6d7cee38e714a8597
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lin_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lin_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Lingala: {{sentence_lin_Latn}} \nEnglish: "
+include: flores
+task: flores_lin_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lua_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lua_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d54350a45f7838492555c4268e552dc609f0e12
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lua_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lua_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Luba-Kasai: {{sentence_lua_Latn}} \nEnglish: "
+include: flores
+task: flores_lua_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lug_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lug_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..35d8e31b1331a8e478bc6960c262a8d5eb5630df
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_lug_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: lug_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Luganda: {{sentence_lug_Latn}} \nEnglish: "
+include: flores
+task: flores_lug_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_luo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_luo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7a22ec7db9d0cf3d3497ab0367c32a2aef602513
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_luo_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: luo_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Luo: {{sentence_luo_Latn}} \nEnglish: "
+include: flores
+task: flores_luo_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_mos_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_mos_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a4c1009c46290faafcb15930cb98217612d5c14
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_mos_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: mos_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Mossi: {{sentence_mos_Latn}} \nEnglish: "
+include: flores
+task: flores_mos_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2409753c8a86d873fc33b68cbaba493b154fd947
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nso_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nso_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Northern Sotho: {{sentence_nso_Latn}} \nEnglish: "
+include: flores
+task: flores_nso_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nus_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nus_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f77380957e4c1b61cd9a277e8e2d831fa7d9a0da
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nus_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nus_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Nuer: {{sentence_nus_Latn}} \nEnglish: "
+include: flores
+task: flores_nus_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nya_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nya_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..def5625dd7a4b6dd4288a796681fe6fbfc40c6ac
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_nya_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nya_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Nyanja: {{sentence_nya_Latn}} \nEnglish: "
+include: flores
+task: flores_nya_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_plt_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_plt_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f877a307254dfe00e645ea544bc1a7fb64411162
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_plt_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: plt_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Plateau Malagasy: {{sentence_plt_Latn}} \nEnglish: "
+include: flores
+task: flores_plt_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_run_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_run_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e00eb85718ffff0eb4fd84a1ce50fc4ff92c9988
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_run_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: run_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Rundi: {{sentence_run_Latn}} \nEnglish: "
+include: flores
+task: flores_run_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sag_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sag_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7f43c6b6cf91d52311d4f0981086afcd833d8b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sag_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sag_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Sango: {{sentence_sag_Latn}} \nEnglish: "
+include: flores
+task: flores_sag_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sna_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sna_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d63b4c6baf598d665509cadcaf1c1613f7f2c77d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sna_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sna_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Shona: {{sentence_sna_Latn}} \nEnglish: "
+include: flores
+task: flores_sna_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_som_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_som_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f625c559f3c1dd1b0490d42c29515dfeaef28d68
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_som_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: som_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Somali: {{sentence_som_Latn}} \nEnglish: "
+include: flores
+task: flores_som_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sot_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sot_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11653e6059e51d71b48e722abd1c519ddd956d00
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sot_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sot_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Southern Sotho: {{sentence_sot_Latn}} \nEnglish: "
+include: flores
+task: flores_sot_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ssw_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ssw_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe3ceb9a874c035a06f3c9fdc46e254a544bc563
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_ssw_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ssw_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Swati: {{sentence_ssw_Latn}} \nEnglish: "
+include: flores
+task: flores_ssw_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sun_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sun_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3f605f9400dacb782a0066b1f6559aaba6ed270
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_sun_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sun_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Sundanese: {{sentence_sun_Latn}} \nEnglish: "
+include: flores
+task: flores_sun_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_swh_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_swh_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7651ac3159f59d94886dc97a8e854fb19e184115
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_swh_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swh_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Swahili: {{sentence_swh_Latn}} \nEnglish: "
+include: flores
+task: flores_swh_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_taq_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_taq_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3fca39004e66ae6c74858cea59e930527a41eff
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_taq_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: taq_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tamasheq: {{sentence_taq_Latn}} \nEnglish: "
+include: flores
+task: flores_taq_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_taq_Tfng-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_taq_Tfng-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7152867ee6e32951f630f2d24d615ec248090fde
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_taq_Tfng-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: taq_Tfng-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tamasheq (Tifinagh script): {{sentence_taq_Tfng}} \nEnglish: "
+include: flores
+task: flores_taq_Tfng-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tir_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tir_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc13ae0413578e722f0f7c7e1c724e565b7a10c6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tir_Ethi-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tir_Ethi-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tigrinya: {{sentence_tir_Ethi}} \nEnglish: "
+include: flores
+task: flores_tir_Ethi-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tsn_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tsn_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a6c4e1c820c15ff4e1b3f2d8db43c102a4b206e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tsn_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tsn_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Setswana: {{sentence_tsn_Latn}} \nEnglish: "
+include: flores
+task: flores_tsn_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d473ab03b198ff0691265f278a6aec6e688e967
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tso_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tso_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tsonga: {{sentence_tso_Latn}} \nEnglish: "
+include: flores
+task: flores_tso_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tum_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tum_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c491f25b514977bd8554a0d53212525b028a6e41
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tum_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tum_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tumbuka: {{sentence_tum_Latn}} \nEnglish: "
+include: flores
+task: flores_tum_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_twi_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_twi_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d8ad29e375918e52cbd3337ac6965a461c7f7fc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_twi_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: twi_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Twi: {{sentence_twi_Latn}} \nEnglish: "
+include: flores
+task: flores_twi_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tzm_Tfng-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tzm_Tfng-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba4624651a68c219eefd7c9157f3bce58d751c48
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_tzm_Tfng-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Central Atlas Tamazight: {{sentence_tzm_Tfng}} \nEnglish: "
+include: flores
+task: flores_tzm_Tfng-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_umb_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_umb_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0758003ad3bb766fad0fa545d39ba976d4443f26
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_umb_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: umb_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Umbundu: {{sentence_umb_Latn}} \nEnglish: "
+include: flores
+task: flores_umb_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_wol_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_wol_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..914e6c128220583cb7f2e064e0eed56948bbdfd9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_wol_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Wolof: {{sentence_wol_Latn}} \nEnglish: "
+include: flores
+task: flores_wol_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_xho_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_xho_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc130fb0447aa7e617debe3cf1086f55ac96aec6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_xho_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: xho_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Xhosa: {{sentence_xho_Latn}} \nEnglish: "
+include: flores
+task: flores_xho_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_yor_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_yor_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ea0fbc4e65b9320f1d4bd701a98819c17b69ab0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_yor_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Yoruba: {{sentence_yor_Latn}} \nEnglish: "
+include: flores
+task: flores_yor_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_zul_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_zul_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea070b30e7822d9e55135f3842766e68f94c1f2c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/african-english/flores_zul_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Zulu: {{sentence_zul_Latn}} \nEnglish: "
+include: flores
+task: flores_zul_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores
new file mode 100644
index 0000000000000000000000000000000000000000..e6f4d051431159f4360115226ea58dec2487c0c2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores
@@ -0,0 +1,27 @@
+tag:
+- african_flores_tasks
+- flores_eng-afr
+- flores_eng-afr_prompt_1
+- afrobench_MT_tasks
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "**"
+    - </s>
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ace_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ace_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9da06483bc1e3f19e10636cdf1509ad899832ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ace_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ace_Arab
+doc_to_target: sentence_ace_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nAcehnese (Arabic script): "
+include: flores
+task: flores_eng_Latn-ace_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ace_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ace_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2ed60660bd3f565484d16440ce9fb2d82f6a555
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ace_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ace_Latn
+doc_to_target: sentence_ace_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nAcehnese (Latin script): "
+include: flores
+task: flores_eng_Latn-ace_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-acq_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-acq_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e61bb2472b427de012ce3d47122906df99f14089
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-acq_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-acq_Arab
+doc_to_target: sentence_acq_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nTa’izzi-Adeni Arabic: "
+include: flores
+task: flores_eng_Latn-acq_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-aeb_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-aeb_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d59000626aa9ef7f6cfcd6bc6a315cbb25a90142
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-aeb_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-aeb_Arab
+doc_to_target: sentence_aeb_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nTunisian Arabic: "
+include: flores
+task: flores_eng_Latn-aeb_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-afr_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-afr_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b4c4d46b432d78f6e9947dbcd26868885560c53
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-afr_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-afr_Latn
+doc_to_target: sentence_afr_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nAfrikaans: "
+include: flores
+task: flores_eng_Latn-afr_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-aka_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-aka_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d66a637f75d19c72d3846819d249f9a67989e04c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-aka_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-aka_Latn
+doc_to_target: sentence_aka_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nAkan: "
+include: flores
+task: flores_eng_Latn-aka_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-amh_Ethi.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-amh_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e648d33270ae5944609c8ce50c2dd3e92bbfeb97
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-amh_Ethi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-amh_Ethi
+doc_to_target: sentence_amh_Ethi
+doc_to_text: "English: {{sentence_eng_Latn}} \nAmharic: "
+include: flores
+task: flores_eng_Latn-amh_Ethi_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ary_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ary_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54f9a2ad67ac390ba4cc4a7a6db6a1d2e5061a54
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ary_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ary_Arab
+doc_to_target: sentence_ary_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nMoroccan Arabic: "
+include: flores
+task: flores_eng_Latn-ary_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-arz_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-arz_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a42fa079b4501f6402eebd3241c26f14b1e5af6e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-arz_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-arz_Arab
+doc_to_target: sentence_arz_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nEgyptian Arabic: "
+include: flores
+task: flores_eng_Latn-arz_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-bam_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-bam_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c85b7db9394d3c309b3e5c5b196a0e5451c4d0b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-bam_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-bam_Latn
+doc_to_target: sentence_bam_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nBambara: "
+include: flores
+task: flores_eng_Latn-bam_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ban_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ban_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f43a4b71131da9cf555964b79a6258ce7f36c2ef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ban_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ban_Latn
+doc_to_target: sentence_ban_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nBalinese: "
+include: flores
+task: flores_eng_Latn-ban_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-bem_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-bem_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..252117ef888300c0dfaa64f2cacc55bf66292136
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-bem_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-bem_Latn
+doc_to_target: sentence_bem_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nBemba: "
+include: flores
+task: flores_eng_Latn-bem_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-cjk_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-cjk_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb4e3566b8383d7bce70a88bd3b663fa984c5154
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-cjk_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-cjk_Latn
+doc_to_target: sentence_cjk_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nChokwe: "
+include: flores
+task: flores_eng_Latn-cjk_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-dik_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-dik_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36dea9d3a9dc3371a576315540f439af9e38b4e5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-dik_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-dik_Latn
+doc_to_target: sentence_dik_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSouthwestern Dinka: "
+include: flores
+task: flores_eng_Latn-dik_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-dyu_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-dyu_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c32be8ac93c7cecfbc171eb898232c7296cf6886
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-dyu_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-dyu_Latn
+doc_to_target: sentence_dyu_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nDyula: "
+include: flores
+task: flores_eng_Latn-dyu_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ewe_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ewe_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a71b4556a077b260bfb340a3c0c289ae79ac88b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ewe_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ewe_Latn
+doc_to_target: sentence_ewe_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nEwe: "
+include: flores
+task: flores_eng_Latn-ewe_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fon_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fon_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1000e13ad1f6864f002c741b8074d06073cb3dc8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fon_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fon_Latn
+doc_to_target: sentence_fon_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nFon: "
+include: flores
+task: flores_eng_Latn-fon_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fra_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fra_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47b99a088c485bed46c51d1da0308ac569aaebd3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fra_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fra_Latn
+doc_to_target: sentence_fra_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nFrench: "
+include: flores
+task: flores_eng_Latn-fra_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fuv_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fuv_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8855378737fa985bf35a840c78f81d34f8542305
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-fuv_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fuv_Latn
+doc_to_target: sentence_fuv_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nNigerian Fulfulde: "
+include: flores
+task: flores_eng_Latn-fuv_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-gaz_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-gaz_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8e124ae153091ed617f87388afb7d6c4c980d754
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-gaz_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-gaz_Latn
+doc_to_target: sentence_gaz_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nOromo: "
+include: flores
+task: flores_eng_Latn-gaz_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-hau_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-hau_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9aaf537f1d1c491ae3de996d2180c9b32002647
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-hau_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-hau_Latn
+doc_to_target: sentence_hau_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nHausa: "
+include: flores
+task: flores_eng_Latn-hau_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ibo_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ibo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ebf8f517c3e96d64716db741688161d520bd04a5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ibo_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ibo_Latn
+doc_to_target: sentence_ibo_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nIgbo: "
+include: flores
+task: flores_eng_Latn-ibo_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kab_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kab_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd22cb7de77e624bea297d7011aa18aab3408b10
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kab_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kab_Latn
+doc_to_target: sentence_kab_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKabyle: "
+include: flores
+task: flores_eng_Latn-kab_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kam_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kam_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..802ae7dca4e9b4165f2028cfce4be8c08c1b0edd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kam_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kam_Latn
+doc_to_target: sentence_kam_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKamba: "
+include: flores
+task: flores_eng_Latn-kam_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kbp_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kbp_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9cc1afd5e9af72a8a7f73a6789cb2dc0af1e9c39
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kbp_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kbp_Latn
+doc_to_target: sentence_kbp_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKabiyè: "
+include: flores
+task: flores_eng_Latn-kbp_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kea_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kea_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55d3e7767c8533eb9f0f94c37a33fdb628c2b27a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kea_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kea_Latn
+doc_to_target: sentence_kea_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKabuverdianu: "
+include: flores
+task: flores_eng_Latn-kea_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kik_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kik_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bee435fe5495364b08420772d1dfade8f9ac671d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kik_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kik_Latn
+doc_to_target: sentence_kik_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKikuyu: "
+include: flores
+task: flores_eng_Latn-kik_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kin_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28530541f153ad52724b5d0aa13eca176fa73c29
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kin_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kin_Latn
+doc_to_target: sentence_kin_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKinyarwanda: "
+include: flores
+task: flores_eng_Latn-kin_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kmb_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kmb_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5619209f346582b64e51b904288181fc18bc34d6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kmb_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kmb_Latn
+doc_to_target: sentence_kmb_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKimbundu: "
+include: flores
+task: flores_eng_Latn-kmb_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-knc_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-knc_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fba5e257d7c5154ef3d89ace61d5f7fe2397ccc8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-knc_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-knc_Arab
+doc_to_target: sentence_knc_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nCentral Kanuri (Arabic script): "
+include: flores
+task: flores_eng_Latn-knc_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-knc_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-knc_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1f84d5753ff5e03d9d4d6463beeb9a390d5c2eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-knc_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-knc_Latn
+doc_to_target: sentence_knc_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nCentral Kanuri (Latin script): "
+include: flores
+task: flores_eng_Latn-knc_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kon_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kon_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6d8ef32d897edfa3086e7b93c39efa41a907e75
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-kon_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kon_Latn
+doc_to_target: sentence_kon_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKikongo: "
+include: flores
+task: flores_eng_Latn-kon_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lin_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f998f3590b5ebdc35388a9a875a6358366684260
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lin_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lin_Latn
+doc_to_target: sentence_lin_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nLingala: "
+include: flores
+task: flores_eng_Latn-lin_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lua_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lua_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..246fc1354a71eadbe1ea6e058387859fd5c018c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lua_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lua_Latn
+doc_to_target: sentence_lua_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nLuba-Kasai: "
+include: flores
+task: flores_eng_Latn-lua_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lug_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lug_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3416989fdba2777eb13664a7db4409f091bb0b75
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-lug_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lug_Latn
+doc_to_target: sentence_lug_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nLuganda: "
+include: flores
+task: flores_eng_Latn-lug_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-luo_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-luo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a56e1482037f8728945929f991a9217a9d91f05
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-luo_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-luo_Latn
+doc_to_target: sentence_luo_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nLuo: "
+include: flores
+task: flores_eng_Latn-luo_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-mos_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-mos_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..393862689847cd6d3c6701696f57bea6f190c564
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-mos_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-mos_Latn
+doc_to_target: sentence_mos_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nMossi: "
+include: flores
+task: flores_eng_Latn-mos_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nso_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86bd9c6bcdc72f750dd3ae245c992e754ec6b55b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nso_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nso_Latn
+doc_to_target: sentence_nso_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nNorthern Sotho: "
+include: flores
+task: flores_eng_Latn-nso_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nus_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nus_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ac9148958f11e460266f4ff55aab4b44263074c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nus_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nus_Latn
+doc_to_target: sentence_nus_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nNuer: "
+include: flores
+task: flores_eng_Latn-nus_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nya_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nya_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f4e35d78e708d25ad57abf36bb3ef4230f1acd66
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-nya_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nya_Latn
+doc_to_target: sentence_nya_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nNyanja: "
+include: flores
+task: flores_eng_Latn-nya_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-plt_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-plt_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e07ffcd257e91028807b37bec7c259f04fd3adb9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-plt_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-plt_Latn
+doc_to_target: sentence_plt_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nPlateau Malagasy: "
+include: flores
+task: flores_eng_Latn-plt_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-run_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-run_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cad3666bdb35072af569f2405ea106c487121d57
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-run_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-run_Latn
+doc_to_target: sentence_run_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nRundi: "
+include: flores
+task: flores_eng_Latn-run_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sag_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sag_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9eaa3c8995add71ccf7052a621d92e76cd06861c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sag_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sag_Latn
+doc_to_target: sentence_sag_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSango: "
+include: flores
+task: flores_eng_Latn-sag_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sna_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sna_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16f70ba79f218b67b8e3efb730cde9d903ea38b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sna_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sna_Latn
+doc_to_target: sentence_sna_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nShona: "
+include: flores
+task: flores_eng_Latn-sna_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-som_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-som_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b628b7a4eddf4a073c2b6a77c6ed295c0f9cca17
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-som_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-som_Latn
+doc_to_target: sentence_som_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSomali: "
+include: flores
+task: flores_eng_Latn-som_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sot_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sot_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62655dff56701879705027502b23986c3bd96f78
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sot_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sot_Latn
+doc_to_target: sentence_sot_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSouthern Sotho: "
+include: flores
+task: flores_eng_Latn-sot_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ssw_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ssw_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c247e565f839de423ac4aeecc79198189471d126
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-ssw_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ssw_Latn
+doc_to_target: sentence_ssw_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSwati: "
+include: flores
+task: flores_eng_Latn-ssw_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sun_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sun_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee3c4a5712ea83c5ab676c949c77192ba4f84735
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-sun_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sun_Latn
+doc_to_target: sentence_sun_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSundanese: "
+include: flores
+task: flores_eng_Latn-sun_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-swh_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-swh_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b464c16601d0e7e385a6df32b83fcde41d24c91
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-swh_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-swh_Latn
+doc_to_target: sentence_swh_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSwahili: "
+include: flores
+task: flores_eng_Latn-swh_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-taq_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-taq_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc50d54faa83f621f08241f59baf6a14e4b6c674
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-taq_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-taq_Latn
+doc_to_target: sentence_taq_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nTamasheq: "
+include: flores
+task: flores_eng_Latn-taq_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-taq_Tfng.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-taq_Tfng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8c0045338ad054dd48750ae88767f958f0b9e4f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-taq_Tfng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-taq_Tfng
+doc_to_target: sentence_taq_Tfng
+doc_to_text: "English: {{sentence_eng_Latn}} \nTamasheq (Tifinagh script): "
+include: flores
+task: flores_eng_Latn-taq_Tfng_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tir_Ethi.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tir_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d3110696c8c1088d2ad2683d8d9d45b3415038d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tir_Ethi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tir_Ethi
+doc_to_target: sentence_tir_Ethi
+doc_to_text: "English: {{sentence_eng_Latn}} \nTigrinya: "
+include: flores
+task: flores_eng_Latn-tir_Ethi_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tsn_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tsn_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d782a2af5cde5ae3a006c205a0796ea1a15750d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tsn_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tsn_Latn
+doc_to_target: sentence_tsn_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSetswana: "
+include: flores
+task: flores_eng_Latn-tsn_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tso_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85bca5e9ffdf083cca344501fb818d7e0e60b732
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tso_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tso_Latn
+doc_to_target: sentence_tso_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nTsonga: "
+include: flores
+task: flores_eng_Latn-tso_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tum_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tum_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9036f3b7a1d2c4fa91a1f4278c1019cdf2bc68a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tum_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tum_Latn
+doc_to_target: sentence_tum_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nTumbuka: "
+include: flores
+task: flores_eng_Latn-tum_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-twi_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-twi_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9658615983d1c44bfd74d88def6db73a465ce96d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-twi_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-twi_Latn
+doc_to_target: sentence_twi_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nTwi: "
+include: flores
+task: flores_eng_Latn-twi_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tzm_Tfng.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tzm_Tfng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28728f412fdbe74d85d062a316eeb385b04b94a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-tzm_Tfng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tzm_Tfng
+doc_to_target: sentence_tzm_Tfng
+doc_to_text: "English: {{sentence_eng_Latn}} \nCentral Atlas Tamazight: "
+include: flores
+task: flores_eng_Latn-tzm_Tfng_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-umb_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-umb_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd95ac316ab006df8c4a52867ae3fdafafa36da2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-umb_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-umb_Latn
+doc_to_target: sentence_umb_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nUmbundu: "
+include: flores
+task: flores_eng_Latn-umb_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-wol_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-wol_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb6965245032f6821b4ca413d6ead9e892bdb407
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-wol_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-wol_Latn
+doc_to_target: sentence_wol_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nWolof: "
+include: flores
+task: flores_eng_Latn-wol_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-xho_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-xho_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08480361c18c787ead563d02783982bd1ad8b8e8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-xho_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-xho_Latn
+doc_to_target: sentence_xho_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nXhosa: "
+include: flores
+task: flores_eng_Latn-xho_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-yor_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-yor_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d29e9a9c859134f25abdc46ca44a256d473415a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-yor_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-yor_Latn
+doc_to_target: sentence_yor_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nYoruba: "
+include: flores
+task: flores_eng_Latn-yor_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-zul_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-zul_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62de546051295ff6b413870f9eee5e806151f4cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/english-african/flores_eng_Latn-zul_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: eng_Latn-zul_Latn
+doc_to_target: sentence_zul_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nZulu: "
+include: flores
+task: flores_eng_Latn-zul_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/flores/prompt_1/flores b/lm_eval/tasks/afrobench/flores/prompt_1/flores
new file mode 100644
index 0000000000000000000000000000000000000000..74f9f33eb22662bec79709bd64d8d31f3fb8eae0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_1/flores
@@ -0,0 +1,24 @@
+tag:
+- flores_tasks
+- flores_afr-eng
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores
new file mode 100644
index 0000000000000000000000000000000000000000..e0fa69a2a441116ef15a4158cc366792d841f304
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores
@@ -0,0 +1,27 @@
+tag:
+- african_flores_tasks
+- flores_afr-eng
+- flores_afr-eng_prompt_2
+- afrobench_MT_tasks
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "**"
+    - </s>
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ace_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ace_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd54b6c84dc428721616791f698ce55f5064aef4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ace_Arab-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ace_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Acehnese (Arabic\
+  \ script) sentences to English \nAcehnese (Arabic script): {{sentence_ace_Arab}}\n\
+  English: "
+include: flores
+task: flores_ace_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ace_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ace_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0814b27f80b3ac85e70179a124708ed5f9c3ac4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ace_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ace_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Acehnese (Latin\
+  \ script) sentences to English \nAcehnese (Latin script): {{sentence_ace_Latn}}\n\
+  English: "
+include: flores
+task: flores_ace_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_acq_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_acq_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d1464b4965d566d628dcfa12583003a972af29aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_acq_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: acq_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Ta’izzi-Adeni\
+  \ Arabic sentences to English \nTa’izzi-Adeni Arabic: {{sentence_acq_Arab}}\nEnglish: "
+include: flores
+task: flores_acq_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_aeb_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_aeb_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2bbded5ff0d48bf2bbd582dbfa97cfa4d222b5c9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_aeb_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aeb_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tunisian Arabic\
+  \ sentences to English \nTunisian Arabic: {{sentence_aeb_Arab}}\nEnglish: "
+include: flores
+task: flores_aeb_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_afr_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_afr_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b5847d8a997310bd191a3e0009d24abdedb6e4f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_afr_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Afrikaans sentences\
+  \ to English \nAfrikaans: {{sentence_afr_Latn}}\nEnglish: "
+include: flores
+task: flores_afr_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_aka_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_aka_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f9493c5ea18493d4b0a2f2070135b5fb01c0692
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_aka_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aka_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Akan sentences\
+  \ to English \nAkan: {{sentence_aka_Latn}}\nEnglish: "
+include: flores
+task: flores_aka_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_amh_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_amh_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d615bfc3d6f9ed19dce78c024fdaa45a53fdac8b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_amh_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Amharic sentences\
+  \ to English \nAmharic: {{sentence_amh_Ethi}}\nEnglish: "
+include: flores
+task: flores_amh_Ethi-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ary_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ary_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..feecf4510ac45bf5d364cf1782942e388c2119eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ary_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ary_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Moroccan Arabic\
+  \ sentences to English \nMoroccan Arabic: {{sentence_ary_Arab}}\nEnglish: "
+include: flores
+task: flores_ary_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_arz_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_arz_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13f3e18b5d6ecd8d911741e4fe1d3ee7720f81df
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_arz_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arz_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Egyptian Arabic\
+  \ sentences to English \nEgyptian Arabic: {{sentence_arz_Arab}}\nEnglish: "
+include: flores
+task: flores_arz_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_bam_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_bam_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a258d264b26a9c02be0ca900f058500b19f0c256
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_bam_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bam_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Bambara sentences\
+  \ to English \nBambara: {{sentence_bam_Latn}}\nEnglish: "
+include: flores
+task: flores_bam_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ban_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ban_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c19cf00874080b980b18e811f61d31a332ca2a5b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ban_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ban_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Balinese sentences\
+  \ to English \nBalinese: {{sentence_ban_Latn}}\nEnglish: "
+include: flores
+task: flores_ban_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_bem_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_bem_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9500a3b37c033a026795f47cc74c6a7df94325c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_bem_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Bemba sentences\
+  \ to English \nBemba: {{sentence_bem_Latn}}\nEnglish: "
+include: flores
+task: flores_bem_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_cjk_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_cjk_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..58185199f7dcfcff5a9c5c4ba501ea7edaa4b26f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_cjk_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: cjk_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Chokwe sentences\
+  \ to English \nChokwe: {{sentence_cjk_Latn}}\nEnglish: "
+include: flores
+task: flores_cjk_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_dik_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_dik_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c9090a56c686036f07c63b3b92bafad068d811e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_dik_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dik_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Southwestern Dinka\
+  \ sentences to English \nSouthwestern Dinka: {{sentence_dik_Latn}}\nEnglish: "
+include: flores
+task: flores_dik_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_dyu_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_dyu_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47187fb0817ddf7c04041f6b2b9ef358138dfdf8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_dyu_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dyu_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Dyula sentences\
+  \ to English \nDyula: {{sentence_dyu_Latn}}\nEnglish: "
+include: flores
+task: flores_dyu_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ewe_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ewe_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8838bc3a03742e9e4da5a387f995dcc94018b5d9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ewe_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Ewe sentences\
+  \ to English \nEwe: {{sentence_ewe_Latn}}\nEnglish: "
+include: flores
+task: flores_ewe_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fon_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fon_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7874a87cecb89c402a0e0c1ffea473f4283cc58f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fon_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fon_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Fon sentences\
+  \ to English \nFon: {{sentence_fon_Latn}}\nEnglish: "
+include: flores
+task: flores_fon_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fra_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fra_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb84246ef4b7942f1ecac940e1f80d1664faef3e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fra_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following French sentences\
+  \ to English \nFrench: {{sentence_fra_Latn}}\nEnglish: "
+include: flores
+task: flores_fra_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fuv_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fuv_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0686706d533616d7c1809313c1f5bd302b3c1a45
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_fuv_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fuv_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Nigerian Fulfulde\
+  \ sentences to English \nNigerian Fulfulde: {{sentence_fuv_Latn}}\nEnglish: "
+include: flores
+task: flores_fuv_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_gaz_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_gaz_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0ba07a6112f9dc2b740a868a5df7673dfba7650
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_gaz_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: gaz_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Oromo sentences\
+  \ to English \nOromo: {{sentence_gaz_Latn}}\nEnglish: "
+include: flores
+task: flores_gaz_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_hau_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_hau_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85647455d58aabfb824508bae282e15f63ccaa18
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_hau_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Hausa sentences\
+  \ to English \nHausa: {{sentence_hau_Latn}}\nEnglish: "
+include: flores
+task: flores_hau_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ibo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ibo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c401f1e75be4f83b89a714d40eaaa97ffe677e36
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ibo_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Igbo sentences\
+  \ to English \nIgbo: {{sentence_ibo_Latn}}\nEnglish: "
+include: flores
+task: flores_ibo_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kab_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kab_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c82946b9143ce0706f9768bb078d6bdc6541ebbc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kab_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kab_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kabyle sentences\
+  \ to English \nKabyle: {{sentence_kab_Latn}}\nEnglish: "
+include: flores
+task: flores_kab_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kam_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kam_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8661bf6e5694498af0e29ca23e9e037ddd77adc9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kam_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kam_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kamba sentences\
+  \ to English \nKamba: {{sentence_kam_Latn}}\nEnglish: "
+include: flores
+task: flores_kam_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kbp_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kbp_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e20af3149dc1baad3b0edca477444a98ba078c9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kbp_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kbp_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kabiyè sentences\
+  \ to English \nKabiyè: {{sentence_kbp_Latn}}\nEnglish: "
+include: flores
+task: flores_kbp_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kea_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kea_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d078c293ab75a286b9ac717f9322d8fd92d0b585
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kea_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kea_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kabuverdianu sentences\
+  \ to English \nKabuverdianu: {{sentence_kea_Latn}}\nEnglish: "
+include: flores
+task: flores_kea_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kik_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kik_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..346dcb98be91749b59747194a47cefe40ca3eef4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kik_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kik_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kikuyu sentences\
+  \ to English \nKikuyu: {{sentence_kik_Latn}}\nEnglish: "
+include: flores
+task: flores_kik_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7210e7e6b21c0458983917f659518ba666d45d0a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kin_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kinyarwanda sentences\
+  \ to English \nKinyarwanda: {{sentence_kin_Latn}}\nEnglish: "
+include: flores
+task: flores_kin_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kmb_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kmb_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b3dc8d5ac6d52ccfb84f6b8fc416ab61eaa7c007
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kmb_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kmb_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kimbundu sentences\
+  \ to English \nKimbundu: {{sentence_kmb_Latn}}\nEnglish: "
+include: flores
+task: flores_kmb_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_knc_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_knc_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..37d5d624ff0f55b15649c5468f215b069efd4bcb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_knc_Arab-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: knc_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Central Kanuri\
+  \ (Arabic script) sentences to English \nCentral Kanuri (Arabic script): {{sentence_knc_Arab}}\n\
+  English: "
+include: flores
+task: flores_knc_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_knc_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_knc_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d60408cd39dc640f7db077186340de97aa4702f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_knc_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: knc_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Central Kanuri\
+  \ (Latin script) sentences to English \nCentral Kanuri (Latin script): {{sentence_knc_Latn}}\n\
+  English: "
+include: flores
+task: flores_knc_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kon_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kon_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63b3539cf9f361ba4b0786596bf43d934c85478c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_kon_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kon_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kikongo sentences\
+  \ to English \nKikongo: {{sentence_kon_Latn}}\nEnglish: "
+include: flores
+task: flores_kon_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82543c70d5b26ed5b79288c0775a6d21216bfbe8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lin_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Lingala sentences\
+  \ to English \nLingala: {{sentence_lin_Latn}}\nEnglish: "
+include: flores
+task: flores_lin_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lua_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lua_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..af0796cc47138b67137e58ba44835ffa9ebf8596
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lua_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lua_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Luba-Kasai sentences\
+  \ to English \nLuba-Kasai: {{sentence_lua_Latn}}\nEnglish: "
+include: flores
+task: flores_lua_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lug_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lug_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb9f47bcaf95caaac8de7730dba8f662dac0230c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_lug_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Luganda sentences\
+  \ to English \nLuganda: {{sentence_lug_Latn}}\nEnglish: "
+include: flores
+task: flores_lug_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_luo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_luo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6000ab87662f6753b7dd98d97dc1c057c6c23b58
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_luo_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: luo_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Luo sentences\
+  \ to English \nLuo: {{sentence_luo_Latn}}\nEnglish: "
+include: flores
+task: flores_luo_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_mos_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_mos_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b72acf36fdc39990bc8d6a91a13e1194ce3d42df
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_mos_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mos_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Mossi sentences\
+  \ to English \nMossi: {{sentence_mos_Latn}}\nEnglish: "
+include: flores
+task: flores_mos_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..028aa75cc17d326bf4d1d85b5c96ff050bb8d78e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nso_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Northern Sotho\
+  \ sentences to English \nNorthern Sotho: {{sentence_nso_Latn}}\nEnglish: "
+include: flores
+task: flores_nso_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nus_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nus_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1f9ca54df695d83cc607b4409522ab459c28a99
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nus_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nus_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Nuer sentences\
+  \ to English \nNuer: {{sentence_nus_Latn}}\nEnglish: "
+include: flores
+task: flores_nus_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nya_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nya_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5ceb01789f8d71651931a85a2b3580381895d97
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_nya_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Nyanja sentences\
+  \ to English \nNyanja: {{sentence_nya_Latn}}\nEnglish: "
+include: flores
+task: flores_nya_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_plt_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_plt_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2cdace5ed128379cd6093e6da5fee9345ef44c3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_plt_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: plt_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Plateau Malagasy\
+  \ sentences to English \nPlateau Malagasy: {{sentence_plt_Latn}}\nEnglish: "
+include: flores
+task: flores_plt_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_run_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_run_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa4b5bc968c230b50942903d989e30e80cb51f8b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_run_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: run_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Rundi sentences\
+  \ to English \nRundi: {{sentence_run_Latn}}\nEnglish: "
+include: flores
+task: flores_run_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sag_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sag_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b20eef56654fac2f2f086dcf6e0deea8a59c345d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sag_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sag_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Sango sentences\
+  \ to English \nSango: {{sentence_sag_Latn}}\nEnglish: "
+include: flores
+task: flores_sag_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sna_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sna_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f0c98f038617264c525edf3d0df5325e05da55ce
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sna_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Shona sentences\
+  \ to English \nShona: {{sentence_sna_Latn}}\nEnglish: "
+include: flores
+task: flores_sna_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_som_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_som_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b862c759b912e197cc16acda3cb68d1271d77e0f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_som_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Somali sentences\
+  \ to English \nSomali: {{sentence_som_Latn}}\nEnglish: "
+include: flores
+task: flores_som_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sot_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sot_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5d4e24709a334418b7a23a5d0852f7e5ea665b8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sot_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Southern Sotho\
+  \ sentences to English \nSouthern Sotho: {{sentence_sot_Latn}}\nEnglish: "
+include: flores
+task: flores_sot_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ssw_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ssw_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ae236e5cbc21cba7724cb345d78ed20097b351b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_ssw_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Swati sentences\
+  \ to English \nSwati: {{sentence_ssw_Latn}}\nEnglish: "
+include: flores
+task: flores_ssw_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sun_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sun_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a697a2194eaf477a40ca3f56787caaf563d2179
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_sun_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sun_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Sundanese sentences\
+  \ to English \nSundanese: {{sentence_sun_Latn}}\nEnglish: "
+include: flores
+task: flores_sun_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_swh_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_swh_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06dd9fcc0d384c4926a681e64f1c185c1111fe94
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_swh_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swh_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Swahili sentences\
+  \ to English \nSwahili: {{sentence_swh_Latn}}\nEnglish: "
+include: flores
+task: flores_swh_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_taq_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_taq_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5380298e28c3be0c4c9ba536dfdcb685dd7356f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_taq_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: taq_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tamasheq sentences\
+  \ to English \nTamasheq: {{sentence_taq_Latn}}\nEnglish: "
+include: flores
+task: flores_taq_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_taq_Tfng-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_taq_Tfng-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7cfb54197cdaffd83c578da681c1b5d36c9f4265
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_taq_Tfng-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: taq_Tfng-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tamasheq (Tifinagh\
+  \ script) sentences to English \nTamasheq (Tifinagh script): {{sentence_taq_Tfng}}\n\
+  English: "
+include: flores
+task: flores_taq_Tfng-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tir_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tir_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..56607b6a6e76f921917eb4453b0851dd8a9fb415
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tir_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tigrinya sentences\
+  \ to English \nTigrinya: {{sentence_tir_Ethi}}\nEnglish: "
+include: flores
+task: flores_tir_Ethi-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tsn_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tsn_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8d04febf4a6ebf564df918c236ede2ccc016b34
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tsn_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tsn_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Setswana sentences\
+  \ to English \nSetswana: {{sentence_tsn_Latn}}\nEnglish: "
+include: flores
+task: flores_tsn_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3c357e9df91da2e9b05faf883128ad9b81028331
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tso_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tso_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tsonga sentences\
+  \ to English \nTsonga: {{sentence_tso_Latn}}\nEnglish: "
+include: flores
+task: flores_tso_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tum_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tum_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d70a89b24f187643ac4e93dcad084e598385207d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tum_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tum_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tumbuka sentences\
+  \ to English \nTumbuka: {{sentence_tum_Latn}}\nEnglish: "
+include: flores
+task: flores_tum_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_twi_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_twi_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d9dc957751e0c5115f4f8cb9d3bd47cfd3a66d9d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_twi_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Twi sentences\
+  \ to English \nTwi: {{sentence_twi_Latn}}\nEnglish: "
+include: flores
+task: flores_twi_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tzm_Tfng-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tzm_Tfng-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81f9c721e731ce51ac8cc8a8adc31225edfc3d59
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_tzm_Tfng-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Central Atlas\
+  \ Tamazight sentences to English \nCentral Atlas Tamazight: {{sentence_tzm_Tfng}}\n\
+  English: "
+include: flores
+task: flores_tzm_Tfng-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_umb_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_umb_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..983675b039a2ba51232dede065edec9dd7536c75
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_umb_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: umb_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Umbundu sentences\
+  \ to English \nUmbundu: {{sentence_umb_Latn}}\nEnglish: "
+include: flores
+task: flores_umb_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_wol_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_wol_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f1210fec591a63d43bd3afebd770b844ffd28a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_wol_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Wolof sentences\
+  \ to English \nWolof: {{sentence_wol_Latn}}\nEnglish: "
+include: flores
+task: flores_wol_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_xho_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_xho_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f28e1bb3eed67659b3ac23ef9f97e6cd9c5ba7d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_xho_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Xhosa sentences\
+  \ to English \nXhosa: {{sentence_xho_Latn}}\nEnglish: "
+include: flores
+task: flores_xho_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_yor_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_yor_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e066592660b79c6b5e4d5c6046786a2b118e1eed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_yor_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Yoruba sentences\
+  \ to English \nYoruba: {{sentence_yor_Latn}}\nEnglish: "
+include: flores
+task: flores_yor_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_zul_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_zul_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3b2fef466a1599ed1c5920328031176db342169
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/african-english/flores_zul_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Zulu sentences\
+  \ to English \nZulu: {{sentence_zul_Latn}}\nEnglish: "
+include: flores
+task: flores_zul_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores
new file mode 100644
index 0000000000000000000000000000000000000000..ab71d6563002c5deed46fb73f2b61bd585b7b9ae
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores
@@ -0,0 +1,27 @@
+tag:
+- african_flores_tasks
+- flores_eng-afr
+- flores_eng-afr_prompt_2
+- afrobench_MT_tasks
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "**"
+    - </s>
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ace_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ace_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30890d9150172e44d453679cea878790d1153f95
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ace_Arab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ace_Arab
+doc_to_target: sentence_ace_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Acehnese (Arabic script) \nEnglish: {{sentence_eng_Latn}} \nAcehnese (Arabic\
+  \ script): "
+include: flores
+task: flores_eng_Latn-ace_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ace_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ace_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4356785a7d7a7de55ea328e7957ba14764a26745
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ace_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ace_Latn
+doc_to_target: sentence_ace_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Acehnese (Latin script) \nEnglish: {{sentence_eng_Latn}} \nAcehnese (Latin\
+  \ script): "
+include: flores
+task: flores_eng_Latn-ace_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-acq_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-acq_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..630c824e342e8032926204ca41cfdbc6472c35eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-acq_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-acq_Arab
+doc_to_target: sentence_acq_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Ta’izzi-Adeni Arabic \nEnglish: {{sentence_eng_Latn}} \nTa’izzi-Adeni Arabic: "
+include: flores
+task: flores_eng_Latn-acq_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-aeb_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-aeb_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0df4f642f499163c22733c8d1c7397f9949054c6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-aeb_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-aeb_Arab
+doc_to_target: sentence_aeb_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tunisian Arabic \nEnglish: {{sentence_eng_Latn}} \nTunisian Arabic: "
+include: flores
+task: flores_eng_Latn-aeb_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-afr_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-afr_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2769adf1ec72d54aab8dd1911b91f14c6c56db7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-afr_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-afr_Latn
+doc_to_target: sentence_afr_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Afrikaans \nEnglish: {{sentence_eng_Latn}} \nAfrikaans: "
+include: flores
+task: flores_eng_Latn-afr_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-aka_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-aka_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..624149c7fb2c031e4483050382e7fe12a09ad32f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-aka_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-aka_Latn
+doc_to_target: sentence_aka_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Akan \nEnglish: {{sentence_eng_Latn}} \nAkan: "
+include: flores
+task: flores_eng_Latn-aka_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-amh_Ethi.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-amh_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a53e8c2f24cba707d059a83dfa18d3f83791021
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-amh_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-amh_Ethi
+doc_to_target: sentence_amh_Ethi
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Amharic \nEnglish: {{sentence_eng_Latn}} \nAmharic: "
+include: flores
+task: flores_eng_Latn-amh_Ethi_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ary_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ary_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb814d766327c71ebfd38d4ca046f2484aa3d3d1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ary_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ary_Arab
+doc_to_target: sentence_ary_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Moroccan Arabic \nEnglish: {{sentence_eng_Latn}} \nMoroccan Arabic: "
+include: flores
+task: flores_eng_Latn-ary_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-arz_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-arz_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0362666cb7d58d02ed5af9e9429f8b56e1a12d47
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-arz_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-arz_Arab
+doc_to_target: sentence_arz_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Egyptian Arabic \nEnglish: {{sentence_eng_Latn}} \nEgyptian Arabic: "
+include: flores
+task: flores_eng_Latn-arz_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-bam_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-bam_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b38459211670064398a117bdb4b5a63c342471d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-bam_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-bam_Latn
+doc_to_target: sentence_bam_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Bambara \nEnglish: {{sentence_eng_Latn}} \nBambara: "
+include: flores
+task: flores_eng_Latn-bam_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ban_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ban_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cff3c15bd4f226d62932f443416cc5e824dae612
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ban_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ban_Latn
+doc_to_target: sentence_ban_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Balinese \nEnglish: {{sentence_eng_Latn}} \nBalinese: "
+include: flores
+task: flores_eng_Latn-ban_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-bem_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-bem_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ef6552a2b4fc29aa64cb6c3e4b3f1304260c9d76
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-bem_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-bem_Latn
+doc_to_target: sentence_bem_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Bemba \nEnglish: {{sentence_eng_Latn}} \nBemba: "
+include: flores
+task: flores_eng_Latn-bem_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-cjk_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-cjk_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..38c4ea6ff6d0358ddf49b934b4f21549fb7b14d2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-cjk_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-cjk_Latn
+doc_to_target: sentence_cjk_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Chokwe \nEnglish: {{sentence_eng_Latn}} \nChokwe: "
+include: flores
+task: flores_eng_Latn-cjk_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-dik_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-dik_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bfcf7180903bace41e71d175a45c65ff68167344
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-dik_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-dik_Latn
+doc_to_target: sentence_dik_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Southwestern Dinka \nEnglish: {{sentence_eng_Latn}} \nSouthwestern Dinka: "
+include: flores
+task: flores_eng_Latn-dik_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-dyu_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-dyu_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9fab72b27ebc9d9c9a80dd7b41c0d270f1114e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-dyu_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-dyu_Latn
+doc_to_target: sentence_dyu_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Dyula \nEnglish: {{sentence_eng_Latn}} \nDyula: "
+include: flores
+task: flores_eng_Latn-dyu_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ewe_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ewe_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ecc34e50ab717b0fa3d8d6608cb952692446f89
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ewe_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ewe_Latn
+doc_to_target: sentence_ewe_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Ewe \nEnglish: {{sentence_eng_Latn}} \nEwe: "
+include: flores
+task: flores_eng_Latn-ewe_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fon_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fon_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed029237af79c6aaabe9942cb911a556718c014c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fon_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fon_Latn
+doc_to_target: sentence_fon_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Fon \nEnglish: {{sentence_eng_Latn}} \nFon: "
+include: flores
+task: flores_eng_Latn-fon_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fra_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fra_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d54e66c20d87b05dc59ee76a468f89fa5aca761
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fra_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fra_Latn
+doc_to_target: sentence_fra_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to French \nEnglish: {{sentence_eng_Latn}} \nFrench: "
+include: flores
+task: flores_eng_Latn-fra_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fuv_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fuv_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a821f58fa428af19d22b819428db35a52f4a6725
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-fuv_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fuv_Latn
+doc_to_target: sentence_fuv_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Nigerian Fulfulde \nEnglish: {{sentence_eng_Latn}} \nNigerian Fulfulde: "
+include: flores
+task: flores_eng_Latn-fuv_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-gaz_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-gaz_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36fa1d6c4e1fad7f33e7182abfdca60a8df9d386
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-gaz_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-gaz_Latn
+doc_to_target: sentence_gaz_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Oromo \nEnglish: {{sentence_eng_Latn}} \nOromo: "
+include: flores
+task: flores_eng_Latn-gaz_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-hau_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-hau_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aad0a48b3277c8150cb5679b4b8b77636d04b5c6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-hau_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-hau_Latn
+doc_to_target: sentence_hau_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Hausa \nEnglish: {{sentence_eng_Latn}} \nHausa: "
+include: flores
+task: flores_eng_Latn-hau_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ibo_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ibo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b31e37cd4ce5a6891f7dff30ff75f05b99bdc48c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ibo_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ibo_Latn
+doc_to_target: sentence_ibo_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Igbo \nEnglish: {{sentence_eng_Latn}} \nIgbo: "
+include: flores
+task: flores_eng_Latn-ibo_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kab_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kab_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d6cfd8cb97ca07352c0d7927bc2476a3e9e378a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kab_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kab_Latn
+doc_to_target: sentence_kab_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kabyle \nEnglish: {{sentence_eng_Latn}} \nKabyle: "
+include: flores
+task: flores_eng_Latn-kab_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kam_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kam_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd2da95c49b7828dfbc174d6a9d891546d433ecd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kam_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kam_Latn
+doc_to_target: sentence_kam_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kamba \nEnglish: {{sentence_eng_Latn}} \nKamba: "
+include: flores
+task: flores_eng_Latn-kam_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kbp_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kbp_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b04cbdf144d5a9718f5a1f9ae38158952e6975e7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kbp_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kbp_Latn
+doc_to_target: sentence_kbp_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kabiyè \nEnglish: {{sentence_eng_Latn}} \nKabiyè: "
+include: flores
+task: flores_eng_Latn-kbp_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kea_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kea_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a67cb9fef15715713918aaafe29f1147f40acda
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kea_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kea_Latn
+doc_to_target: sentence_kea_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kabuverdianu \nEnglish: {{sentence_eng_Latn}} \nKabuverdianu: "
+include: flores
+task: flores_eng_Latn-kea_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kik_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kik_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1519f36e63c76ba57759547e91bab111c3796dcf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kik_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kik_Latn
+doc_to_target: sentence_kik_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kikuyu \nEnglish: {{sentence_eng_Latn}} \nKikuyu: "
+include: flores
+task: flores_eng_Latn-kik_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kin_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b33033ff00dce959d37f6b7fe8f0440a6cd1577
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kin_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kin_Latn
+doc_to_target: sentence_kin_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kinyarwanda \nEnglish: {{sentence_eng_Latn}} \nKinyarwanda: "
+include: flores
+task: flores_eng_Latn-kin_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kmb_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kmb_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..803989174a43ad7567cc321f7f7847039bc516d5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kmb_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kmb_Latn
+doc_to_target: sentence_kmb_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kimbundu \nEnglish: {{sentence_eng_Latn}} \nKimbundu: "
+include: flores
+task: flores_eng_Latn-kmb_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-knc_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-knc_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0d262413539f659922105528184c6b1f9c74f05
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-knc_Arab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-knc_Arab
+doc_to_target: sentence_knc_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Central Kanuri (Arabic script) \nEnglish: {{sentence_eng_Latn}} \nCentral Kanuri\
+  \ (Arabic script): "
+include: flores
+task: flores_eng_Latn-knc_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-knc_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-knc_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..61ea7a2cdf03e9cd2e6fcef2abcc6e072cb5f430
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-knc_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-knc_Latn
+doc_to_target: sentence_knc_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Central Kanuri (Latin script) \nEnglish: {{sentence_eng_Latn}} \nCentral Kanuri\
+  \ (Latin script): "
+include: flores
+task: flores_eng_Latn-knc_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kon_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kon_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1967452e0032b023b48dfd3980e9d8241aed8e09
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-kon_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kon_Latn
+doc_to_target: sentence_kon_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kikongo \nEnglish: {{sentence_eng_Latn}} \nKikongo: "
+include: flores
+task: flores_eng_Latn-kon_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lin_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05e2593bdee5d218324f959277480f74db95a82b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lin_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lin_Latn
+doc_to_target: sentence_lin_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Lingala \nEnglish: {{sentence_eng_Latn}} \nLingala: "
+include: flores
+task: flores_eng_Latn-lin_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lua_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lua_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f4fe01e16cf1715dbf8467e9bb6fb1558f4b923
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lua_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lua_Latn
+doc_to_target: sentence_lua_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Luba-Kasai \nEnglish: {{sentence_eng_Latn}} \nLuba-Kasai: "
+include: flores
+task: flores_eng_Latn-lua_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lug_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lug_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cfc35568598cd7733748a5f06fa5a1ad5c7c85e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-lug_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lug_Latn
+doc_to_target: sentence_lug_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Luganda \nEnglish: {{sentence_eng_Latn}} \nLuganda: "
+include: flores
+task: flores_eng_Latn-lug_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-luo_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-luo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05c027bb0256d1a22e1c14ca2812b6f9abb65fb7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-luo_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-luo_Latn
+doc_to_target: sentence_luo_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Luo \nEnglish: {{sentence_eng_Latn}} \nLuo: "
+include: flores
+task: flores_eng_Latn-luo_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-mos_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-mos_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a676522a51603f951c5dfe0d88a6d99823b46eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-mos_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-mos_Latn
+doc_to_target: sentence_mos_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Mossi \nEnglish: {{sentence_eng_Latn}} \nMossi: "
+include: flores
+task: flores_eng_Latn-mos_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nso_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c681b492c17f5e95709c9f0bd06637b10c07c9c3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nso_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nso_Latn
+doc_to_target: sentence_nso_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Northern Sotho \nEnglish: {{sentence_eng_Latn}} \nNorthern Sotho: "
+include: flores
+task: flores_eng_Latn-nso_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nus_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nus_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ae375058b9393357df2aca5f52c69d6b6fde744
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nus_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nus_Latn
+doc_to_target: sentence_nus_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Nuer \nEnglish: {{sentence_eng_Latn}} \nNuer: "
+include: flores
+task: flores_eng_Latn-nus_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nya_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nya_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..135029028e124537ec4b2dab4222fcb582d38beb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-nya_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nya_Latn
+doc_to_target: sentence_nya_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Nyanja \nEnglish: {{sentence_eng_Latn}} \nNyanja: "
+include: flores
+task: flores_eng_Latn-nya_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-plt_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-plt_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..faa85197438e9ff31744de7683957736b3ad34bc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-plt_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-plt_Latn
+doc_to_target: sentence_plt_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Plateau Malagasy \nEnglish: {{sentence_eng_Latn}} \nPlateau Malagasy: "
+include: flores
+task: flores_eng_Latn-plt_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-run_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-run_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b670e3f7146cec72c15c9be17ad0df6b30a1a4b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-run_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-run_Latn
+doc_to_target: sentence_run_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Rundi \nEnglish: {{sentence_eng_Latn}} \nRundi: "
+include: flores
+task: flores_eng_Latn-run_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sag_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sag_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32f399391b905b77d5bea93229fc6b4c5de9e533
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sag_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sag_Latn
+doc_to_target: sentence_sag_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Sango \nEnglish: {{sentence_eng_Latn}} \nSango: "
+include: flores
+task: flores_eng_Latn-sag_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sna_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sna_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e219c40275fb7938cc2a121822b534776aff57b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sna_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sna_Latn
+doc_to_target: sentence_sna_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Shona \nEnglish: {{sentence_eng_Latn}} \nShona: "
+include: flores
+task: flores_eng_Latn-sna_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-som_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-som_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f87466dc875c1b2402ccd195f164949c94aa3e5e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-som_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-som_Latn
+doc_to_target: sentence_som_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Somali \nEnglish: {{sentence_eng_Latn}} \nSomali: "
+include: flores
+task: flores_eng_Latn-som_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sot_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sot_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..674d162b64e72d5c3d58521643a8dae6042b9cf5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sot_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sot_Latn
+doc_to_target: sentence_sot_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Southern Sotho \nEnglish: {{sentence_eng_Latn}} \nSouthern Sotho: "
+include: flores
+task: flores_eng_Latn-sot_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ssw_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ssw_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23b9216f912ba5cd340181dd5c07b19c4ff03c7d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-ssw_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ssw_Latn
+doc_to_target: sentence_ssw_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Swati \nEnglish: {{sentence_eng_Latn}} \nSwati: "
+include: flores
+task: flores_eng_Latn-ssw_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sun_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sun_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f51ced5e6ce5353da73945815adbaab9e9c0d94
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-sun_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sun_Latn
+doc_to_target: sentence_sun_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Sundanese \nEnglish: {{sentence_eng_Latn}} \nSundanese: "
+include: flores
+task: flores_eng_Latn-sun_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-swh_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-swh_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1558af98e0011ddf66f5ec63bcde425414a539a2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-swh_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-swh_Latn
+doc_to_target: sentence_swh_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Swahili \nEnglish: {{sentence_eng_Latn}} \nSwahili: "
+include: flores
+task: flores_eng_Latn-swh_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-taq_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-taq_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b09b52f46c56e15fc30aff90cbac8c8b8f8e2b8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-taq_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-taq_Latn
+doc_to_target: sentence_taq_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tamasheq \nEnglish: {{sentence_eng_Latn}} \nTamasheq: "
+include: flores
+task: flores_eng_Latn-taq_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-taq_Tfng.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-taq_Tfng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b69f1dbd4dd96f81e055b758bfc103a81f7c116c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-taq_Tfng.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-taq_Tfng
+doc_to_target: sentence_taq_Tfng
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tamasheq (Tifinagh script) \nEnglish: {{sentence_eng_Latn}} \nTamasheq (Tifinagh\
+  \ script): "
+include: flores
+task: flores_eng_Latn-taq_Tfng_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tir_Ethi.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tir_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4340591d8e397caa4525832e1225b364574c4664
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tir_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tir_Ethi
+doc_to_target: sentence_tir_Ethi
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tigrinya \nEnglish: {{sentence_eng_Latn}} \nTigrinya: "
+include: flores
+task: flores_eng_Latn-tir_Ethi_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tsn_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tsn_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e592366ebb36a4c621f05e31326fbf36125025b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tsn_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tsn_Latn
+doc_to_target: sentence_tsn_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Setswana \nEnglish: {{sentence_eng_Latn}} \nSetswana: "
+include: flores
+task: flores_eng_Latn-tsn_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tso_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d027a2aa2fc08aaa9fa792391eb69d46ddee802
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tso_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tso_Latn
+doc_to_target: sentence_tso_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tsonga \nEnglish: {{sentence_eng_Latn}} \nTsonga: "
+include: flores
+task: flores_eng_Latn-tso_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tum_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tum_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1accaeaf4bd9cbbfb5c46c6341a3bb81663767be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tum_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tum_Latn
+doc_to_target: sentence_tum_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tumbuka \nEnglish: {{sentence_eng_Latn}} \nTumbuka: "
+include: flores
+task: flores_eng_Latn-tum_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-twi_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-twi_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a45df82e6c60141396deafa19cd7882b2edb689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-twi_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-twi_Latn
+doc_to_target: sentence_twi_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Twi \nEnglish: {{sentence_eng_Latn}} \nTwi: "
+include: flores
+task: flores_eng_Latn-twi_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tzm_Tfng.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tzm_Tfng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a3faa15d24df79d839a226cc374ace410133a12
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-tzm_Tfng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tzm_Tfng
+doc_to_target: sentence_tzm_Tfng
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Central Atlas Tamazight \nEnglish: {{sentence_eng_Latn}} \nCentral Atlas Tamazight: "
+include: flores
+task: flores_eng_Latn-tzm_Tfng_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-umb_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-umb_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f21c6fe1939f288d56b6bd1229ce6755babb807
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-umb_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-umb_Latn
+doc_to_target: sentence_umb_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Umbundu \nEnglish: {{sentence_eng_Latn}} \nUmbundu: "
+include: flores
+task: flores_eng_Latn-umb_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-wol_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-wol_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..263ded277f0e1a596eac0bf1af5ab1858cf6cd42
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-wol_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-wol_Latn
+doc_to_target: sentence_wol_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Wolof \nEnglish: {{sentence_eng_Latn}} \nWolof: "
+include: flores
+task: flores_eng_Latn-wol_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-xho_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-xho_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a92e46f996b3dfacb06bd5d8d589d43763800e1a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-xho_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-xho_Latn
+doc_to_target: sentence_xho_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Xhosa \nEnglish: {{sentence_eng_Latn}} \nXhosa: "
+include: flores
+task: flores_eng_Latn-xho_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-yor_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-yor_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..80ec895c70fc2f09261ec6df48f2e6bc9755f479
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-yor_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-yor_Latn
+doc_to_target: sentence_yor_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Yoruba \nEnglish: {{sentence_eng_Latn}} \nYoruba: "
+include: flores
+task: flores_eng_Latn-yor_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-zul_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-zul_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..593cdfe3c7c6878bef06e9af476476fdcbbfdfd6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/english-african/flores_eng_Latn-zul_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-zul_Latn
+doc_to_target: sentence_zul_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Zulu \nEnglish: {{sentence_eng_Latn}} \nZulu: "
+include: flores
+task: flores_eng_Latn-zul_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/flores/prompt_2/flores b/lm_eval/tasks/afrobench/flores/prompt_2/flores
new file mode 100644
index 0000000000000000000000000000000000000000..74f9f33eb22662bec79709bd64d8d31f3fb8eae0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_2/flores
@@ -0,0 +1,24 @@
+tag:
+- flores_tasks
+- flores_afr-eng
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores
new file mode 100644
index 0000000000000000000000000000000000000000..60bf41116e43ccdd17efcdcbe0e72c8aad0cf684
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores
@@ -0,0 +1,27 @@
+tag:
+- african_flores_tasks
+- flores_afr-eng
+- flores_afr-eng_prompt_3
+- afrobench_MT_tasks
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "**"
+    - </s>
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ace_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ace_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee5f12704a3a7f03a52aba093e01e335a98729ff
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ace_Arab-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ace_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Acehnese (Arabic script) and English linguist, translate the following\
+  \ Acehnese (Arabic script) sentences to English \nAcehnese (Arabic script): {{sentence_ace_Arab}}\n\
+  English: "
+include: flores
+task: flores_ace_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ace_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ace_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1d70ba341b8e30123cfd2885f570e5050c359a5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ace_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ace_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Acehnese (Latin script) and English linguist, translate the following\
+  \ Acehnese (Latin script) sentences to English \nAcehnese (Latin script): {{sentence_ace_Latn}}\n\
+  English: "
+include: flores
+task: flores_ace_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_acq_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_acq_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8cda39626e72a3df883f1b74c314e8c275fa4fbb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_acq_Arab-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: acq_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Ta’izzi-Adeni Arabic and English linguist, translate the following\
+  \ Ta’izzi-Adeni Arabic sentences to English \nTa’izzi-Adeni Arabic: {{sentence_acq_Arab}}\n\
+  English: "
+include: flores
+task: flores_acq_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_aeb_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_aeb_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97f8ef2c91bd0255f2b887ff6b1acb75fc1c0487
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_aeb_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aeb_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tunisian Arabic and English linguist, translate the following Tunisian\
+  \ Arabic sentences to English \nTunisian Arabic: {{sentence_aeb_Arab}}\nEnglish: "
+include: flores
+task: flores_aeb_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_afr_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_afr_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e228cb9c66858d173835016566cd1f4731038120
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_afr_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Afrikaans and English linguist, translate the following Afrikaans\
+  \ sentences to English \nAfrikaans: {{sentence_afr_Latn}}\nEnglish: "
+include: flores
+task: flores_afr_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_aka_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_aka_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d6fc38582c415828023478f33f2925427a68cbb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_aka_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aka_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Akan and English linguist, translate the following Akan sentences\
+  \ to English \nAkan: {{sentence_aka_Latn}}\nEnglish: "
+include: flores
+task: flores_aka_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_amh_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_amh_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..58f33f9a13c5c9840fd4dcdcdd12c664daf60878
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_amh_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Amharic and English linguist, translate the following Amharic sentences\
+  \ to English \nAmharic: {{sentence_amh_Ethi}}\nEnglish: "
+include: flores
+task: flores_amh_Ethi-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ary_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ary_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3006ebf72c0088340346a3a7b8f140da84211048
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ary_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ary_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Moroccan Arabic and English linguist, translate the following Moroccan\
+  \ Arabic sentences to English \nMoroccan Arabic: {{sentence_ary_Arab}}\nEnglish: "
+include: flores
+task: flores_ary_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_arz_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_arz_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..46cc0a18d4633b7032c7c179606845941f595e8b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_arz_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arz_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Egyptian Arabic and English linguist, translate the following Egyptian\
+  \ Arabic sentences to English \nEgyptian Arabic: {{sentence_arz_Arab}}\nEnglish: "
+include: flores
+task: flores_arz_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_bam_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_bam_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c50a8dfa4ae3b2a99c2bb40f64749fb22c8928ac
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_bam_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bam_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Bambara and English linguist, translate the following Bambara sentences\
+  \ to English \nBambara: {{sentence_bam_Latn}}\nEnglish: "
+include: flores
+task: flores_bam_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ban_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ban_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86f2eed3fef3b2aefef9f0a7e640310d054e3fc9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ban_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ban_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Balinese and English linguist, translate the following Balinese\
+  \ sentences to English \nBalinese: {{sentence_ban_Latn}}\nEnglish: "
+include: flores
+task: flores_ban_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_bem_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_bem_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55c32fe9c5e3f4321b6c3145d862d23f48da233b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_bem_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Bemba and English linguist, translate the following Bemba sentences\
+  \ to English \nBemba: {{sentence_bem_Latn}}\nEnglish: "
+include: flores
+task: flores_bem_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_cjk_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_cjk_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..642cd4dda88f9c7e38092fbc650e8340ff8998a2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_cjk_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: cjk_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Chokwe and English linguist, translate the following Chokwe sentences\
+  \ to English \nChokwe: {{sentence_cjk_Latn}}\nEnglish: "
+include: flores
+task: flores_cjk_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_dik_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_dik_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8005a642241e9ba4e9255a224f6c7d641553edd7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_dik_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: dik_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Southwestern Dinka and English linguist, translate the following\
+  \ Southwestern Dinka sentences to English \nSouthwestern Dinka: {{sentence_dik_Latn}}\n\
+  English: "
+include: flores
+task: flores_dik_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_dyu_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_dyu_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a99efc0867c365186db75a8f04a0fbfa741f91c2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_dyu_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dyu_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Dyula and English linguist, translate the following Dyula sentences\
+  \ to English \nDyula: {{sentence_dyu_Latn}}\nEnglish: "
+include: flores
+task: flores_dyu_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ewe_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ewe_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77133ad60cc57992882213908c1abe2300fae291
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ewe_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Ewe and English linguist, translate the following Ewe sentences\
+  \ to English \nEwe: {{sentence_ewe_Latn}}\nEnglish: "
+include: flores
+task: flores_ewe_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fon_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fon_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..436bf4ac3e5319e8cf2da5791607f8d4f7564eca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fon_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fon_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Fon and English linguist, translate the following Fon sentences\
+  \ to English \nFon: {{sentence_fon_Latn}}\nEnglish: "
+include: flores
+task: flores_fon_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fra_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fra_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b10c46e3226342c3a01c90f881df8575049eb6b6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fra_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a French and English linguist, translate the following French sentences\
+  \ to English \nFrench: {{sentence_fra_Latn}}\nEnglish: "
+include: flores
+task: flores_fra_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fuv_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fuv_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ffcbd3c04f1f6fd608e11286be4d88c079890a88
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_fuv_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fuv_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Nigerian Fulfulde and English linguist, translate the following\
+  \ Nigerian Fulfulde sentences to English \nNigerian Fulfulde: {{sentence_fuv_Latn}}\n\
+  English: "
+include: flores
+task: flores_fuv_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_gaz_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_gaz_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..703cd3517a81e172683fb43b91ddbb4ca7db500e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_gaz_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: gaz_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Oromo and English linguist, translate the following Oromo sentences\
+  \ to English \nOromo: {{sentence_gaz_Latn}}\nEnglish: "
+include: flores
+task: flores_gaz_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_hau_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_hau_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7527bf78ebc88167a99707eb3101b1c350e5c991
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_hau_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Hausa and English linguist, translate the following Hausa sentences\
+  \ to English \nHausa: {{sentence_hau_Latn}}\nEnglish: "
+include: flores
+task: flores_hau_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ibo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ibo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7705911a67b2584a9d1afc3cd5c4294a37a22ece
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ibo_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Igbo and English linguist, translate the following Igbo sentences\
+  \ to English \nIgbo: {{sentence_ibo_Latn}}\nEnglish: "
+include: flores
+task: flores_ibo_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kab_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kab_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec406c5e0fbf8f5b41b17e432586c00f8383eabd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kab_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kab_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kabyle and English linguist, translate the following Kabyle sentences\
+  \ to English \nKabyle: {{sentence_kab_Latn}}\nEnglish: "
+include: flores
+task: flores_kab_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kam_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kam_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed27b6d79c71b5c1f4690cd409a86aabf9901124
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kam_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kam_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kamba and English linguist, translate the following Kamba sentences\
+  \ to English \nKamba: {{sentence_kam_Latn}}\nEnglish: "
+include: flores
+task: flores_kam_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kbp_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kbp_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5c1a0961e08908cf1e846a87e7ddce3641e80ead
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kbp_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kbp_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kabiyè and English linguist, translate the following Kabiyè sentences\
+  \ to English \nKabiyè: {{sentence_kbp_Latn}}\nEnglish: "
+include: flores
+task: flores_kbp_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kea_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kea_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67dd9e73fa327338fecec80728ac645f78996b92
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kea_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kea_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kabuverdianu and English linguist, translate the following Kabuverdianu\
+  \ sentences to English \nKabuverdianu: {{sentence_kea_Latn}}\nEnglish: "
+include: flores
+task: flores_kea_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kik_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kik_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14a6be5dfd5c86c44c8f3acb23af0f36d5445ead
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kik_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kik_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kikuyu and English linguist, translate the following Kikuyu sentences\
+  \ to English \nKikuyu: {{sentence_kik_Latn}}\nEnglish: "
+include: flores
+task: flores_kik_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bb14aedf4e95de93a3d4da32a07b19bf854b697
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kin_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kinyarwanda and English linguist, translate the following Kinyarwanda\
+  \ sentences to English \nKinyarwanda: {{sentence_kin_Latn}}\nEnglish: "
+include: flores
+task: flores_kin_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kmb_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kmb_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c31ede4dfe8449d2a1c8e84b74eeee0fdc908b78
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kmb_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kmb_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kimbundu and English linguist, translate the following Kimbundu\
+  \ sentences to English \nKimbundu: {{sentence_kmb_Latn}}\nEnglish: "
+include: flores
+task: flores_kmb_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_knc_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_knc_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8c7f8095e11d407d75360ee5e4794e45fc17eeb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_knc_Arab-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: knc_Arab-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Central Kanuri (Arabic script) and English linguist, translate\
+  \ the following Central Kanuri (Arabic script) sentences to English \nCentral Kanuri\
+  \ (Arabic script): {{sentence_knc_Arab}}\nEnglish: "
+include: flores
+task: flores_knc_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_knc_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_knc_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9621de73d33ea37feeb3a4face5dbe50812bf9ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_knc_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: knc_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Central Kanuri (Latin script) and English linguist, translate the\
+  \ following Central Kanuri (Latin script) sentences to English \nCentral Kanuri\
+  \ (Latin script): {{sentence_knc_Latn}}\nEnglish: "
+include: flores
+task: flores_knc_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kon_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kon_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54ede3a6e2c0280761a3af529cc0a8fe82d2f518
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_kon_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kon_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kikongo and English linguist, translate the following Kikongo sentences\
+  \ to English \nKikongo: {{sentence_kon_Latn}}\nEnglish: "
+include: flores
+task: flores_kon_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea7e736d949726bf2296091e4309a12d493dbe4f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lin_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Lingala and English linguist, translate the following Lingala sentences\
+  \ to English \nLingala: {{sentence_lin_Latn}}\nEnglish: "
+include: flores
+task: flores_lin_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lua_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lua_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..327f014489f7502ac062537c72d4846a7099b64d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lua_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lua_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Luba-Kasai and English linguist, translate the following Luba-Kasai\
+  \ sentences to English \nLuba-Kasai: {{sentence_lua_Latn}}\nEnglish: "
+include: flores
+task: flores_lua_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lug_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lug_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bfa92fa280f98278f7735634231cb99d44bc71f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_lug_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Luganda and English linguist, translate the following Luganda sentences\
+  \ to English \nLuganda: {{sentence_lug_Latn}}\nEnglish: "
+include: flores
+task: flores_lug_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_luo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_luo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a66fded383a914454aed5e904278e60bf85d1e62
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_luo_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: luo_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Luo and English linguist, translate the following Luo sentences\
+  \ to English \nLuo: {{sentence_luo_Latn}}\nEnglish: "
+include: flores
+task: flores_luo_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_mos_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_mos_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e428853bf22859e37bbd10dcd4e113188b7519ef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_mos_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mos_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Mossi and English linguist, translate the following Mossi sentences\
+  \ to English \nMossi: {{sentence_mos_Latn}}\nEnglish: "
+include: flores
+task: flores_mos_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..054aa409b729cc70d70b1d53a49945f92096f4b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nso_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Northern Sotho and English linguist, translate the following Northern\
+  \ Sotho sentences to English \nNorthern Sotho: {{sentence_nso_Latn}}\nEnglish: "
+include: flores
+task: flores_nso_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nus_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nus_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3e0d1e3ac8ff35a2b0c89f37cb2697a6d15299a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nus_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nus_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Nuer and English linguist, translate the following Nuer sentences\
+  \ to English \nNuer: {{sentence_nus_Latn}}\nEnglish: "
+include: flores
+task: flores_nus_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nya_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nya_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e23c57c6807991e1b509c0a03a7c93a23eac3015
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_nya_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Nyanja and English linguist, translate the following Nyanja sentences\
+  \ to English \nNyanja: {{sentence_nya_Latn}}\nEnglish: "
+include: flores
+task: flores_nya_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_plt_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_plt_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3ddfd864c3830d4921e1fcda79155c733809b305
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_plt_Latn-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: plt_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Plateau Malagasy and English linguist, translate the following\
+  \ Plateau Malagasy sentences to English \nPlateau Malagasy: {{sentence_plt_Latn}}\n\
+  English: "
+include: flores
+task: flores_plt_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_run_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_run_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64a82f716b71950711e6b055a6e30f45356a082c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_run_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: run_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Rundi and English linguist, translate the following Rundi sentences\
+  \ to English \nRundi: {{sentence_run_Latn}}\nEnglish: "
+include: flores
+task: flores_run_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sag_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sag_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48408f94054fee78fe7c3be6460de563e9e60f0a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sag_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sag_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Sango and English linguist, translate the following Sango sentences\
+  \ to English \nSango: {{sentence_sag_Latn}}\nEnglish: "
+include: flores
+task: flores_sag_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sna_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sna_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff1626419b69a8e94349bfd63093ad33914bbcec
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sna_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Shona and English linguist, translate the following Shona sentences\
+  \ to English \nShona: {{sentence_sna_Latn}}\nEnglish: "
+include: flores
+task: flores_sna_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_som_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_som_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e27e2a5b3d1754f4c47755a1b92c7ac95938e67
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_som_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Somali and English linguist, translate the following Somali sentences\
+  \ to English \nSomali: {{sentence_som_Latn}}\nEnglish: "
+include: flores
+task: flores_som_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sot_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sot_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc70b6f62b317e27cd8c00d95d89e103945028dc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sot_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Southern Sotho and English linguist, translate the following Southern\
+  \ Sotho sentences to English \nSouthern Sotho: {{sentence_sot_Latn}}\nEnglish: "
+include: flores
+task: flores_sot_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ssw_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ssw_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cd61ae8e8ceb6b9a0f985457f26d25961272036
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_ssw_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Swati and English linguist, translate the following Swati sentences\
+  \ to English \nSwati: {{sentence_ssw_Latn}}\nEnglish: "
+include: flores
+task: flores_ssw_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sun_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sun_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..000108f77ea1a0b578c1ca980ed0d74490f24fdf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_sun_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sun_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Sundanese and English linguist, translate the following Sundanese\
+  \ sentences to English \nSundanese: {{sentence_sun_Latn}}\nEnglish: "
+include: flores
+task: flores_sun_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_swh_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_swh_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c81805c1f22b9e6c6bd55ba925fa7dfb80f0cf1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_swh_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swh_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Swahili and English linguist, translate the following Swahili sentences\
+  \ to English \nSwahili: {{sentence_swh_Latn}}\nEnglish: "
+include: flores
+task: flores_swh_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_taq_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_taq_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6febb3004bc3f39f94287ac41a9883d95f056fe1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_taq_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: taq_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tamasheq and English linguist, translate the following Tamasheq\
+  \ sentences to English \nTamasheq: {{sentence_taq_Latn}}\nEnglish: "
+include: flores
+task: flores_taq_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_taq_Tfng-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_taq_Tfng-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6290ab94d3be2e52750f9af5900d7c27f27cb5af
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_taq_Tfng-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: taq_Tfng-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tamasheq (Tifinagh script) and English linguist, translate the\
+  \ following Tamasheq (Tifinagh script) sentences to English \nTamasheq (Tifinagh\
+  \ script): {{sentence_taq_Tfng}}\nEnglish: "
+include: flores
+task: flores_taq_Tfng-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tir_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tir_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60133a3b735a0b1d91901bdd7f1ef2122b0f0f03
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tir_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tigrinya and English linguist, translate the following Tigrinya\
+  \ sentences to English \nTigrinya: {{sentence_tir_Ethi}}\nEnglish: "
+include: flores
+task: flores_tir_Ethi-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tsn_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tsn_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40417bde77b4a0ede2e9c87c56668be101579f3b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tsn_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tsn_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Setswana and English linguist, translate the following Setswana\
+  \ sentences to English \nSetswana: {{sentence_tsn_Latn}}\nEnglish: "
+include: flores
+task: flores_tsn_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..56d4632500b86964d0d665f1827cd129bc508d63
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tso_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tso_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tsonga and English linguist, translate the following Tsonga sentences\
+  \ to English \nTsonga: {{sentence_tso_Latn}}\nEnglish: "
+include: flores
+task: flores_tso_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tum_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tum_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc4bb541f7a5692a067ad75f5e3a86490487cc70
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tum_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tum_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tumbuka and English linguist, translate the following Tumbuka sentences\
+  \ to English \nTumbuka: {{sentence_tum_Latn}}\nEnglish: "
+include: flores
+task: flores_tum_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_twi_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_twi_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4cc0d674c8ced1f005eef5a94c87a886a6f176aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_twi_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Twi and English linguist, translate the following Twi sentences\
+  \ to English \nTwi: {{sentence_twi_Latn}}\nEnglish: "
+include: flores
+task: flores_twi_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tzm_Tfng-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tzm_Tfng-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3575ccb2a766a722cbc88880f34656af8cdb3a4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_tzm_Tfng-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Central Atlas Tamazight and English linguist, translate the following\
+  \ Central Atlas Tamazight sentences to English \nCentral Atlas Tamazight: {{sentence_tzm_Tfng}}\n\
+  English: "
+include: flores
+task: flores_tzm_Tfng-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_umb_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_umb_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7df76cf07cb4bf8f772721136fd4d92280b3820
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_umb_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: umb_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Umbundu and English linguist, translate the following Umbundu sentences\
+  \ to English \nUmbundu: {{sentence_umb_Latn}}\nEnglish: "
+include: flores
+task: flores_umb_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_wol_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_wol_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22275ca15cd1829db481c0c77363a10649be101f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_wol_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Wolof and English linguist, translate the following Wolof sentences\
+  \ to English \nWolof: {{sentence_wol_Latn}}\nEnglish: "
+include: flores
+task: flores_wol_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_xho_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_xho_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85ae368b6efa8dab3a8a2b110e438b043ef3c74f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_xho_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Xhosa and English linguist, translate the following Xhosa sentences\
+  \ to English \nXhosa: {{sentence_xho_Latn}}\nEnglish: "
+include: flores
+task: flores_xho_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_yor_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_yor_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5bbd8eb967ea1f4c6afe09bef65e379c4fed9c25
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_yor_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Yoruba and English linguist, translate the following Yoruba sentences\
+  \ to English \nYoruba: {{sentence_yor_Latn}}\nEnglish: "
+include: flores
+task: flores_yor_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_zul_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_zul_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea2c2edb8439fffb452187b86ed1690501b20b3d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/african-english/flores_zul_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn-eng_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Zulu and English linguist, translate the following Zulu sentences\
+  \ to English \nZulu: {{sentence_zul_Latn}}\nEnglish: "
+include: flores
+task: flores_zul_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores
new file mode 100644
index 0000000000000000000000000000000000000000..ac7dc1651e4729ae0357c6d958745400ddc35ea1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores
@@ -0,0 +1,27 @@
+tag:
+- african_flores_tasks
+- flores_eng-afr
+- flores_eng-afr_prompt_3
+- afrobench_MT_tasks
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "**"
+    - </s>
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ace_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ace_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53cf711fa19132b8668d1c4a6024e1b96f54751b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ace_Arab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ace_Arab
+doc_to_target: sentence_ace_Arab
+doc_to_text: "As a Acehnese (Arabic script) and English linguist, translate the following\
+  \ English sentences to Acehnese (Arabic script) \nEnglish: {{sentence_eng_Latn}}\
+  \ \nAcehnese (Arabic script): "
+include: flores
+task: flores_eng_Latn-ace_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ace_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ace_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..766b7c30061e8adfd3e4827052fba7160483ae4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ace_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ace_Latn
+doc_to_target: sentence_ace_Latn
+doc_to_text: "As a Acehnese (Latin script) and English linguist, translate the following\
+  \ English sentences to Acehnese (Latin script) \nEnglish: {{sentence_eng_Latn}}\
+  \ \nAcehnese (Latin script): "
+include: flores
+task: flores_eng_Latn-ace_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-acq_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-acq_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e809c866eb602e76defc6c3fca983e02bc213a52
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-acq_Arab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-acq_Arab
+doc_to_target: sentence_acq_Arab
+doc_to_text: "As a Ta’izzi-Adeni Arabic and English linguist, translate the following\
+  \ English sentences to Ta’izzi-Adeni Arabic \nEnglish: {{sentence_eng_Latn}} \n\
+  Ta’izzi-Adeni Arabic: "
+include: flores
+task: flores_eng_Latn-acq_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-aeb_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-aeb_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e8263fe6af0b65e5c935c7d56146b6940c6850b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-aeb_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-aeb_Arab
+doc_to_target: sentence_aeb_Arab
+doc_to_text: "As a Tunisian Arabic and English linguist, translate the following English\
+  \ sentences to Tunisian Arabic \nEnglish: {{sentence_eng_Latn}} \nTunisian Arabic: "
+include: flores
+task: flores_eng_Latn-aeb_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-afr_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-afr_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86421c268959192fae2dcbc19a1a4b935d6bff29
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-afr_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-afr_Latn
+doc_to_target: sentence_afr_Latn
+doc_to_text: "As a Afrikaans and English linguist, translate the following English\
+  \ sentences to Afrikaans \nEnglish: {{sentence_eng_Latn}} \nAfrikaans: "
+include: flores
+task: flores_eng_Latn-afr_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-aka_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-aka_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3373390566a317a7438e216cea67b926d5dd20fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-aka_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-aka_Latn
+doc_to_target: sentence_aka_Latn
+doc_to_text: "As a Akan and English linguist, translate the following English sentences\
+  \ to Akan \nEnglish: {{sentence_eng_Latn}} \nAkan: "
+include: flores
+task: flores_eng_Latn-aka_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-amh_Ethi.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-amh_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba3e0116586dfb106bc57103dc685ddd8856570d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-amh_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-amh_Ethi
+doc_to_target: sentence_amh_Ethi
+doc_to_text: "As a Amharic and English linguist, translate the following English sentences\
+  \ to Amharic \nEnglish: {{sentence_eng_Latn}} \nAmharic: "
+include: flores
+task: flores_eng_Latn-amh_Ethi_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ary_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ary_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c732756a2ea06e33114c117c630f9b3fccab32fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ary_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ary_Arab
+doc_to_target: sentence_ary_Arab
+doc_to_text: "As a Moroccan Arabic and English linguist, translate the following English\
+  \ sentences to Moroccan Arabic \nEnglish: {{sentence_eng_Latn}} \nMoroccan Arabic: "
+include: flores
+task: flores_eng_Latn-ary_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-arz_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-arz_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f11bc38a2dc1c1cc565979a47b51b5aad9bc830e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-arz_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-arz_Arab
+doc_to_target: sentence_arz_Arab
+doc_to_text: "As a Egyptian Arabic and English linguist, translate the following English\
+  \ sentences to Egyptian Arabic \nEnglish: {{sentence_eng_Latn}} \nEgyptian Arabic: "
+include: flores
+task: flores_eng_Latn-arz_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-bam_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-bam_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c762962832885fe21c75986f7ce006789217dbd4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-bam_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-bam_Latn
+doc_to_target: sentence_bam_Latn
+doc_to_text: "As a Bambara and English linguist, translate the following English sentences\
+  \ to Bambara \nEnglish: {{sentence_eng_Latn}} \nBambara: "
+include: flores
+task: flores_eng_Latn-bam_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ban_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ban_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..601aecf5cebebdb6572fadf8f82d2963b9b87d5c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ban_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ban_Latn
+doc_to_target: sentence_ban_Latn
+doc_to_text: "As a Balinese and English linguist, translate the following English\
+  \ sentences to Balinese \nEnglish: {{sentence_eng_Latn}} \nBalinese: "
+include: flores
+task: flores_eng_Latn-ban_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-bem_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-bem_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fadabdb9356f28fda88a68e69646a0fd60141e9c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-bem_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-bem_Latn
+doc_to_target: sentence_bem_Latn
+doc_to_text: "As a Bemba and English linguist, translate the following English sentences\
+  \ to Bemba \nEnglish: {{sentence_eng_Latn}} \nBemba: "
+include: flores
+task: flores_eng_Latn-bem_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-cjk_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-cjk_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c522831373d25e05103918ba43f36183106cc509
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-cjk_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-cjk_Latn
+doc_to_target: sentence_cjk_Latn
+doc_to_text: "As a Chokwe and English linguist, translate the following English sentences\
+  \ to Chokwe \nEnglish: {{sentence_eng_Latn}} \nChokwe: "
+include: flores
+task: flores_eng_Latn-cjk_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-dik_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-dik_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..acfeb83ad758573c632ca1b3e9e08f190b86fa30
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-dik_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-dik_Latn
+doc_to_target: sentence_dik_Latn
+doc_to_text: "As a Southwestern Dinka and English linguist, translate the following\
+  \ English sentences to Southwestern Dinka \nEnglish: {{sentence_eng_Latn}} \nSouthwestern\
+  \ Dinka: "
+include: flores
+task: flores_eng_Latn-dik_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-dyu_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-dyu_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..796dc6d2f633c5baba22d2cce8592f0f01e3fe42
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-dyu_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-dyu_Latn
+doc_to_target: sentence_dyu_Latn
+doc_to_text: "As a Dyula and English linguist, translate the following English sentences\
+  \ to Dyula \nEnglish: {{sentence_eng_Latn}} \nDyula: "
+include: flores
+task: flores_eng_Latn-dyu_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ewe_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ewe_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..31a07891820793360f26b2d093e98b5982816ea6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ewe_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ewe_Latn
+doc_to_target: sentence_ewe_Latn
+doc_to_text: "As a Ewe and English linguist, translate the following English sentences\
+  \ to Ewe \nEnglish: {{sentence_eng_Latn}} \nEwe: "
+include: flores
+task: flores_eng_Latn-ewe_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fon_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fon_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cdc7308d63891ed0fc65e779e394b71eafb3bb1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fon_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fon_Latn
+doc_to_target: sentence_fon_Latn
+doc_to_text: "As a Fon and English linguist, translate the following English sentences\
+  \ to Fon \nEnglish: {{sentence_eng_Latn}} \nFon: "
+include: flores
+task: flores_eng_Latn-fon_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fra_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fra_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3896879db152bc1e583fd24f5823321d0f6eda4d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fra_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fra_Latn
+doc_to_target: sentence_fra_Latn
+doc_to_text: "As a French and English linguist, translate the following English sentences\
+  \ to French \nEnglish: {{sentence_eng_Latn}} \nFrench: "
+include: flores
+task: flores_eng_Latn-fra_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fuv_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fuv_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b63249be8c1e81e837e9a024dd19ecd822f748b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-fuv_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-fuv_Latn
+doc_to_target: sentence_fuv_Latn
+doc_to_text: "As a Nigerian Fulfulde and English linguist, translate the following\
+  \ English sentences to Nigerian Fulfulde \nEnglish: {{sentence_eng_Latn}} \nNigerian\
+  \ Fulfulde: "
+include: flores
+task: flores_eng_Latn-fuv_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-gaz_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-gaz_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95cde87c38c66448967d595f60709c2f908af5f0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-gaz_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-gaz_Latn
+doc_to_target: sentence_gaz_Latn
+doc_to_text: "As a Oromo and English linguist, translate the following English sentences\
+  \ to Oromo \nEnglish: {{sentence_eng_Latn}} \nOromo: "
+include: flores
+task: flores_eng_Latn-gaz_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-hau_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-hau_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eec82e34503bb64fcab1c90cf507499760f95a15
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-hau_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-hau_Latn
+doc_to_target: sentence_hau_Latn
+doc_to_text: "As a Hausa and English linguist, translate the following English sentences\
+  \ to Hausa \nEnglish: {{sentence_eng_Latn}} \nHausa: "
+include: flores
+task: flores_eng_Latn-hau_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ibo_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ibo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..838990b364097652e9ba4ed68726147e4424d05e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ibo_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ibo_Latn
+doc_to_target: sentence_ibo_Latn
+doc_to_text: "As a Igbo and English linguist, translate the following English sentences\
+  \ to Igbo \nEnglish: {{sentence_eng_Latn}} \nIgbo: "
+include: flores
+task: flores_eng_Latn-ibo_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kab_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kab_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16888ad8f28b64a7bb9715fdf8f193e18ce06072
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kab_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kab_Latn
+doc_to_target: sentence_kab_Latn
+doc_to_text: "As a Kabyle and English linguist, translate the following English sentences\
+  \ to Kabyle \nEnglish: {{sentence_eng_Latn}} \nKabyle: "
+include: flores
+task: flores_eng_Latn-kab_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kam_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kam_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d48c52d16017b2e1241afd154539148d3f0d0ae4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kam_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kam_Latn
+doc_to_target: sentence_kam_Latn
+doc_to_text: "As a Kamba and English linguist, translate the following English sentences\
+  \ to Kamba \nEnglish: {{sentence_eng_Latn}} \nKamba: "
+include: flores
+task: flores_eng_Latn-kam_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kbp_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kbp_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c992a28f7e168e3753e378631fa6ce716e7ee69e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kbp_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kbp_Latn
+doc_to_target: sentence_kbp_Latn
+doc_to_text: "As a Kabiyè and English linguist, translate the following English sentences\
+  \ to Kabiyè \nEnglish: {{sentence_eng_Latn}} \nKabiyè: "
+include: flores
+task: flores_eng_Latn-kbp_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kea_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kea_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8ce1b502edea3c9f00fe89dbf9dc382010e4bff
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kea_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kea_Latn
+doc_to_target: sentence_kea_Latn
+doc_to_text: "As a Kabuverdianu and English linguist, translate the following English\
+  \ sentences to Kabuverdianu \nEnglish: {{sentence_eng_Latn}} \nKabuverdianu: "
+include: flores
+task: flores_eng_Latn-kea_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kik_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kik_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc7975c2b23bb486ead2962f28064a5fcab6102f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kik_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kik_Latn
+doc_to_target: sentence_kik_Latn
+doc_to_text: "As a Kikuyu and English linguist, translate the following English sentences\
+  \ to Kikuyu \nEnglish: {{sentence_eng_Latn}} \nKikuyu: "
+include: flores
+task: flores_eng_Latn-kik_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kin_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e2b91d461378cb7c8ff098d237037eefdcacc03
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kin_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kin_Latn
+doc_to_target: sentence_kin_Latn
+doc_to_text: "As a Kinyarwanda and English linguist, translate the following English\
+  \ sentences to Kinyarwanda \nEnglish: {{sentence_eng_Latn}} \nKinyarwanda: "
+include: flores
+task: flores_eng_Latn-kin_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kmb_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kmb_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..270f29b629e6f1f06da31ba154d977b0281fd63b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kmb_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kmb_Latn
+doc_to_target: sentence_kmb_Latn
+doc_to_text: "As a Kimbundu and English linguist, translate the following English\
+  \ sentences to Kimbundu \nEnglish: {{sentence_eng_Latn}} \nKimbundu: "
+include: flores
+task: flores_eng_Latn-kmb_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-knc_Arab.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-knc_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd2994d36152fc1dcb4a7a2561cc41982dd6fed1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-knc_Arab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-knc_Arab
+doc_to_target: sentence_knc_Arab
+doc_to_text: "As a Central Kanuri (Arabic script) and English linguist, translate\
+  \ the following English sentences to Central Kanuri (Arabic script) \nEnglish: {{sentence_eng_Latn}}\
+  \ \nCentral Kanuri (Arabic script): "
+include: flores
+task: flores_eng_Latn-knc_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-knc_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-knc_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..262d0c1f3b8efc51c35e7154f27a9a4e6ed1405f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-knc_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-knc_Latn
+doc_to_target: sentence_knc_Latn
+doc_to_text: "As a Central Kanuri (Latin script) and English linguist, translate the\
+  \ following English sentences to Central Kanuri (Latin script) \nEnglish: {{sentence_eng_Latn}}\
+  \ \nCentral Kanuri (Latin script): "
+include: flores
+task: flores_eng_Latn-knc_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kon_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kon_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae9e1201808061f32c0e9d9260b8d2900f7bd7d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-kon_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-kon_Latn
+doc_to_target: sentence_kon_Latn
+doc_to_text: "As a Kikongo and English linguist, translate the following English sentences\
+  \ to Kikongo \nEnglish: {{sentence_eng_Latn}} \nKikongo: "
+include: flores
+task: flores_eng_Latn-kon_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lin_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0945c697c27b39ed91cff296dd162735f0629f4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lin_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lin_Latn
+doc_to_target: sentence_lin_Latn
+doc_to_text: "As a Lingala and English linguist, translate the following English sentences\
+  \ to Lingala \nEnglish: {{sentence_eng_Latn}} \nLingala: "
+include: flores
+task: flores_eng_Latn-lin_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lua_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lua_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff92a2cf381a3a4c94a5543901be39a647d24eb7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lua_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lua_Latn
+doc_to_target: sentence_lua_Latn
+doc_to_text: "As a Luba-Kasai and English linguist, translate the following English\
+  \ sentences to Luba-Kasai \nEnglish: {{sentence_eng_Latn}} \nLuba-Kasai: "
+include: flores
+task: flores_eng_Latn-lua_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lug_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lug_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4dfc626b9fdbcde3de0383b5d512365137da00b5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-lug_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-lug_Latn
+doc_to_target: sentence_lug_Latn
+doc_to_text: "As a Luganda and English linguist, translate the following English sentences\
+  \ to Luganda \nEnglish: {{sentence_eng_Latn}} \nLuganda: "
+include: flores
+task: flores_eng_Latn-lug_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-luo_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-luo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..803ed75d8b732c81f859285f974cd216afb86784
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-luo_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-luo_Latn
+doc_to_target: sentence_luo_Latn
+doc_to_text: "As a Luo and English linguist, translate the following English sentences\
+  \ to Luo \nEnglish: {{sentence_eng_Latn}} \nLuo: "
+include: flores
+task: flores_eng_Latn-luo_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-mos_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-mos_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e959db1653eff6ca0054ec5032144a96c2c5713
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-mos_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-mos_Latn
+doc_to_target: sentence_mos_Latn
+doc_to_text: "As a Mossi and English linguist, translate the following English sentences\
+  \ to Mossi \nEnglish: {{sentence_eng_Latn}} \nMossi: "
+include: flores
+task: flores_eng_Latn-mos_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nso_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44839d82cac5af78f74b2f382d36cbd74f93baa6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nso_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nso_Latn
+doc_to_target: sentence_nso_Latn
+doc_to_text: "As a Northern Sotho and English linguist, translate the following English\
+  \ sentences to Northern Sotho \nEnglish: {{sentence_eng_Latn}} \nNorthern Sotho: "
+include: flores
+task: flores_eng_Latn-nso_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nus_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nus_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..387e4341f0761727d3e07a8748291aac574c727f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nus_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nus_Latn
+doc_to_target: sentence_nus_Latn
+doc_to_text: "As a Nuer and English linguist, translate the following English sentences\
+  \ to Nuer \nEnglish: {{sentence_eng_Latn}} \nNuer: "
+include: flores
+task: flores_eng_Latn-nus_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nya_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nya_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9311e264e1f7e617a22c52e7ac969b1001f7c5e7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-nya_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-nya_Latn
+doc_to_target: sentence_nya_Latn
+doc_to_text: "As a Nyanja and English linguist, translate the following English sentences\
+  \ to Nyanja \nEnglish: {{sentence_eng_Latn}} \nNyanja: "
+include: flores
+task: flores_eng_Latn-nya_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-plt_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-plt_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..afc81158cbed0e268746ee51c1d4e1071f4315e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-plt_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-plt_Latn
+doc_to_target: sentence_plt_Latn
+doc_to_text: "As a Plateau Malagasy and English linguist, translate the following\
+  \ English sentences to Plateau Malagasy \nEnglish: {{sentence_eng_Latn}} \nPlateau\
+  \ Malagasy: "
+include: flores
+task: flores_eng_Latn-plt_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-run_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-run_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..519700cd32de76f039a6f1d3ce16a4c539278334
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-run_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-run_Latn
+doc_to_target: sentence_run_Latn
+doc_to_text: "As a Rundi and English linguist, translate the following English sentences\
+  \ to Rundi \nEnglish: {{sentence_eng_Latn}} \nRundi: "
+include: flores
+task: flores_eng_Latn-run_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sag_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sag_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa99b16137861e1e9fb4f19669dbb71977fd3cc1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sag_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sag_Latn
+doc_to_target: sentence_sag_Latn
+doc_to_text: "As a Sango and English linguist, translate the following English sentences\
+  \ to Sango \nEnglish: {{sentence_eng_Latn}} \nSango: "
+include: flores
+task: flores_eng_Latn-sag_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sna_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sna_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd7ac49ac5854133223a599569981f4c27d19a21
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sna_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sna_Latn
+doc_to_target: sentence_sna_Latn
+doc_to_text: "As a Shona and English linguist, translate the following English sentences\
+  \ to Shona \nEnglish: {{sentence_eng_Latn}} \nShona: "
+include: flores
+task: flores_eng_Latn-sna_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-som_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-som_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..17870addf00ed2c4f0c4e126fc094a06aabc8027
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-som_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-som_Latn
+doc_to_target: sentence_som_Latn
+doc_to_text: "As a Somali and English linguist, translate the following English sentences\
+  \ to Somali \nEnglish: {{sentence_eng_Latn}} \nSomali: "
+include: flores
+task: flores_eng_Latn-som_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sot_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sot_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a45cf383057f37f504f799d7cb241ec61274fd83
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sot_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sot_Latn
+doc_to_target: sentence_sot_Latn
+doc_to_text: "As a Southern Sotho and English linguist, translate the following English\
+  \ sentences to Southern Sotho \nEnglish: {{sentence_eng_Latn}} \nSouthern Sotho: "
+include: flores
+task: flores_eng_Latn-sot_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ssw_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ssw_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dbd162772b8938aef4f05da00ac3da0ce3be530
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-ssw_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-ssw_Latn
+doc_to_target: sentence_ssw_Latn
+doc_to_text: "As a Swati and English linguist, translate the following English sentences\
+  \ to Swati \nEnglish: {{sentence_eng_Latn}} \nSwati: "
+include: flores
+task: flores_eng_Latn-ssw_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sun_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sun_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f8f6339450e8af7318e570290370a21037ba98d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-sun_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-sun_Latn
+doc_to_target: sentence_sun_Latn
+doc_to_text: "As a Sundanese and English linguist, translate the following English\
+  \ sentences to Sundanese \nEnglish: {{sentence_eng_Latn}} \nSundanese: "
+include: flores
+task: flores_eng_Latn-sun_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-swh_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-swh_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20971c5383cbcce97b3743262adc35c9d2dfadcf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-swh_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-swh_Latn
+doc_to_target: sentence_swh_Latn
+doc_to_text: "As a Swahili and English linguist, translate the following English sentences\
+  \ to Swahili \nEnglish: {{sentence_eng_Latn}} \nSwahili: "
+include: flores
+task: flores_eng_Latn-swh_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-taq_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-taq_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdb06f77b78b9ca296eb960fc62a63a94e990fd8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-taq_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-taq_Latn
+doc_to_target: sentence_taq_Latn
+doc_to_text: "As a Tamasheq and English linguist, translate the following English\
+  \ sentences to Tamasheq \nEnglish: {{sentence_eng_Latn}} \nTamasheq: "
+include: flores
+task: flores_eng_Latn-taq_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-taq_Tfng.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-taq_Tfng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d690651ddb21eaf3022475e98dc8f5ddf72f073b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-taq_Tfng.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-taq_Tfng
+doc_to_target: sentence_taq_Tfng
+doc_to_text: "As a Tamasheq (Tifinagh script) and English linguist, translate the\
+  \ following English sentences to Tamasheq (Tifinagh script) \nEnglish: {{sentence_eng_Latn}}\
+  \ \nTamasheq (Tifinagh script): "
+include: flores
+task: flores_eng_Latn-taq_Tfng_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tir_Ethi.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tir_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6b3ba347ab935d9c87a48262a9cafb259985ea0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tir_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tir_Ethi
+doc_to_target: sentence_tir_Ethi
+doc_to_text: "As a Tigrinya and English linguist, translate the following English\
+  \ sentences to Tigrinya \nEnglish: {{sentence_eng_Latn}} \nTigrinya: "
+include: flores
+task: flores_eng_Latn-tir_Ethi_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tsn_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tsn_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..845626f5f347436a0fcd2c04fe3446cb806da44e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tsn_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tsn_Latn
+doc_to_target: sentence_tsn_Latn
+doc_to_text: "As a Setswana and English linguist, translate the following English\
+  \ sentences to Setswana \nEnglish: {{sentence_eng_Latn}} \nSetswana: "
+include: flores
+task: flores_eng_Latn-tsn_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tso_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..958411f89ea39c77cc329ce5f14795761ec1f1a9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tso_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tso_Latn
+doc_to_target: sentence_tso_Latn
+doc_to_text: "As a Tsonga and English linguist, translate the following English sentences\
+  \ to Tsonga \nEnglish: {{sentence_eng_Latn}} \nTsonga: "
+include: flores
+task: flores_eng_Latn-tso_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tum_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tum_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95e6efa7dbdd8ea987bd680954e69e97742c452d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tum_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tum_Latn
+doc_to_target: sentence_tum_Latn
+doc_to_text: "As a Tumbuka and English linguist, translate the following English sentences\
+  \ to Tumbuka \nEnglish: {{sentence_eng_Latn}} \nTumbuka: "
+include: flores
+task: flores_eng_Latn-tum_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-twi_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-twi_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dcb20543ccff51461bc2af582fcdfaa5855d25f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-twi_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-twi_Latn
+doc_to_target: sentence_twi_Latn
+doc_to_text: "As a Twi and English linguist, translate the following English sentences\
+  \ to Twi \nEnglish: {{sentence_eng_Latn}} \nTwi: "
+include: flores
+task: flores_eng_Latn-twi_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tzm_Tfng.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tzm_Tfng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..887344c67c0fe6cd1658c8172b55a252bfc9f12b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-tzm_Tfng.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn-tzm_Tfng
+doc_to_target: sentence_tzm_Tfng
+doc_to_text: "As a Central Atlas Tamazight and English linguist, translate the following\
+  \ English sentences to Central Atlas Tamazight \nEnglish: {{sentence_eng_Latn}}\
+  \ \nCentral Atlas Tamazight: "
+include: flores
+task: flores_eng_Latn-tzm_Tfng_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-umb_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-umb_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8c4adc0139f6f2d9ce4fcd910e3f558b96df78d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-umb_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-umb_Latn
+doc_to_target: sentence_umb_Latn
+doc_to_text: "As a Umbundu and English linguist, translate the following English sentences\
+  \ to Umbundu \nEnglish: {{sentence_eng_Latn}} \nUmbundu: "
+include: flores
+task: flores_eng_Latn-umb_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-wol_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-wol_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..66ad25794e67d54123f2a63ffad5e98d12c6ce59
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-wol_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-wol_Latn
+doc_to_target: sentence_wol_Latn
+doc_to_text: "As a Wolof and English linguist, translate the following English sentences\
+  \ to Wolof \nEnglish: {{sentence_eng_Latn}} \nWolof: "
+include: flores
+task: flores_eng_Latn-wol_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-xho_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-xho_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8cd2fe08ec7bcd000182a7a9f080e739b6d82289
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-xho_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-xho_Latn
+doc_to_target: sentence_xho_Latn
+doc_to_text: "As a Xhosa and English linguist, translate the following English sentences\
+  \ to Xhosa \nEnglish: {{sentence_eng_Latn}} \nXhosa: "
+include: flores
+task: flores_eng_Latn-xho_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-yor_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-yor_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09562458138acb1d146e5319816b01e2205351ce
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-yor_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-yor_Latn
+doc_to_target: sentence_yor_Latn
+doc_to_text: "As a Yoruba and English linguist, translate the following English sentences\
+  \ to Yoruba \nEnglish: {{sentence_eng_Latn}} \nYoruba: "
+include: flores
+task: flores_eng_Latn-yor_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-zul_Latn.yaml b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-zul_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..15b41952e52b3acfee85294c372ecb954f8b37cc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/english-african/flores_eng_Latn-zul_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn-zul_Latn
+doc_to_target: sentence_zul_Latn
+doc_to_text: "As a Zulu and English linguist, translate the following English sentences\
+  \ to Zulu \nEnglish: {{sentence_eng_Latn}} \nZulu: "
+include: flores
+task: flores_eng_Latn-zul_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/flores/prompt_3/flores b/lm_eval/tasks/afrobench/flores/prompt_3/flores
new file mode 100644
index 0000000000000000000000000000000000000000..74f9f33eb22662bec79709bd64d8d31f3fb8eae0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/flores/prompt_3/flores
@@ -0,0 +1,24 @@
+tag:
+- flores_tasks
+- flores_afr-eng
+dataset_path: facebook/flores
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: devtest
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/injongointent/README.md b/lm_eval/tasks/afrobench/injongointent/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..641877cb7c01a5b19791b20c95a246753ddee75a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/README.md
@@ -0,0 +1,23 @@
+#
+
+## Paper
+Title: `INJONGO: A Multicultural Intent Detection and Slot-filling Dataset for 16 African Languages`
+
+Paper Link: https://arxiv.org/abs/2502.09814
+
+## Abstract
+>Slot-filling and intent detection are well-established tasks in Conversational AI. However, current large-scale benchmarks for these tasks often exclude evaluations of low-resource languages and rely on translations from English benchmarks, thereby predominantly reflecting Western-centric concepts. In this paper, we introduce Injongo -- a multicultural, open-source benchmark dataset for 16 African languages with utterances generated by native speakers across diverse domains, including banking, travel, home, and dining. Through extensive experiments, we benchmark the fine-tuning multilingual transformer models and the prompting large language models (LLMs), and show the advantage of leveraging African-cultural utterances over Western-centric utterances for improving cross-lingual transfer from the English language. Experimental results reveal that current LLMs struggle with the slot-filling task, with GPT-4o achieving an average performance of 26 F1-score. In contrast, intent detection performance is notably better, with an average accuracy of 70.6%, though it still falls behind the fine-tuning baselines. Compared to the English language, GPT-4o and fine-tuning baselines perform similarly on intent detection, achieving an accuracy of approximately 81%. Our findings suggest that the performance of LLMs is still behind for many low-resource African languages, and more work is needed to further improve their downstream performance.
+
+### Citation
+
+```
+@misc{yu2025injongomulticulturalintentdetection,
+      title={INJONGO: A Multicultural Intent Detection and Slot-filling Dataset for 16 African Languages},
+      author={Hao Yu and Jesujoba O. Alabi and Andiswa Bukula and Jian Yun Zhuang and En-Shiun Annie Lee and Tadesse Kebede Guge and Israel Abebe Azime and Happy Buzaaba and Blessing Kudzaishe Sibanda and Godson K. Kalipe and Jonathan Mukiibi and Salomon Kabongo Kabenamualu and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Juliet W. Murage and Dietrich Klakow and David Ifeoluwa Adelani},
+      year={2025},
+      eprint={2502.09814},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2502.09814},
+}
+```
diff --git a/lm_eval/tasks/afrobench/injongointent/gen_utils.py b/lm_eval/tasks/afrobench/injongointent/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..112041999df20d26a31becc30633720a16457b18
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/gen_utils.py
@@ -0,0 +1,159 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang, intent):
+    prompt_map = {
+        "prompt_1": "Given the text: '{{text}}', determine the correct intent from the following list: "
+        f"[{', '.join(intent)}]. Only output one intent from the list.",
+        "prompt_2": "Analyze the text: '{{text}}'. Choose the most appropriate intent from these options: "
+        f"[{', '.join(intent)}]. Respond with only the selected intent.",
+        "prompt_3": "You are a linguistic analyst trained to understand user intent. Based on the text: '{{text}}', "
+        f"choose the intent that best matches from this list: [{', '.join(intent)}]. Return only the intent.",
+        "prompt_4": f"You are a {lang} linguistic analyst trained to understand {lang} user intent. Based on the {lang}"
+        "text: '{{text}}', choose the intent that best matches from this list: "
+        f"[{', '.join(intent)}]. Return only the intent.",
+        "prompt_5": f"The following text is in {lang}: '{{{{text}}}}'. Given the list of intents: [{', '.join(intent)}], "
+        "identify the intent expressed in the text. Return only the identified intent.",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "amh": "Amharic",
+        "ewe": "Ewe",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "kin": "Kinyarwanda",
+        "lin": "Lingala",
+        "lug": "Luganda",
+        "orm": "Oromo",
+        "sna": "Shona",
+        "sot": "Sotho",
+        "swa": "Swahili",
+        "twi": "Twi",
+        "wol": "Wolof",
+        "xho": "Xhosa",
+        "yor": "Yoruba",
+        "zul": "Zulu",
+        "eng": "English",
+    }
+
+    intents = [
+        "alarm",
+        "balance",
+        "bill_balance",
+        "book_flight",
+        "book_hotel",
+        "calendar_update",
+        "cancel_reservation",
+        "car_rental",
+        "confirm_reservation",
+        "cook_time",
+        "exchange_rate",
+        "food_last",
+        "freeze_account",
+        "ingredients_list",
+        "interest_rate",
+        "international_visa",
+        "make_call",
+        "meal_suggestion",
+        "min_payment",
+        "pay_bill",
+        "pin_change",
+        "play_music",
+        "plug_type",
+        "recipe",
+        "restaurant_reservation",
+        "restaurant_reviews",
+        "restaurant_suggestion",
+        "share_location",
+        "shopping_list_update",
+        "spending_history",
+        "text",
+        "time",
+        "timezone",
+        "transactions",
+        "transfer",
+        "translate",
+        "travel_notification",
+        "travel_suggestion",
+        "update_playlist",
+        "weather",
+    ]
+
+    for lang in languages.keys():
+        try:
+            file_name = f"injongointent_{lang}.yaml"
+            task_name = f"injongointent_{lang}_{mode}"
+            yaml_template = "injongointent"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang], intents),
+            }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_3",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/injongointent/injongointent.yaml b/lm_eval/tasks/afrobench/injongointent/injongointent.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..220f4c514f0afb8ec9105d54c90f834b4fd57780
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/injongointent.yaml
@@ -0,0 +1,13 @@
+group: injongointent
+task:
+  - injongointent_prompt_1
+  - injongointent_prompt_2
+  - injongointent_prompt_3
+  - injongointent_prompt_4
+  - injongointent_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent
new file mode 100644
index 0000000000000000000000000000000000000000..a77bc5c95941392779b960df6ad26ebebe5ba96d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent
@@ -0,0 +1,75 @@
+tag:
+- injongointent_tasks
+- injongointent_prompt_1
+dataset_path: masakhane/InjongoIntent
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: intent
+doc_to_choice:
+  - alarm
+  - balance
+  - bill_balance
+  - book_flight
+  - book_hotel
+  - calendar_update
+  - cancel_reservation
+  - car_rental
+  - confirm_reservation
+  - cook_time
+  - exchange_rate
+  - food_last
+  - freeze_account
+  - ingredients_list
+  - interest_rate
+  - international_visa
+  - make_call
+  - meal_suggestion
+  - min_payment
+  - pay_bill
+  - pin_change
+  - play_music
+  - plug_type
+  - recipe
+  - restaurant_reservation
+  - restaurant_reviews
+  - restaurant_suggestion
+  - share_location
+  - shopping_list_update
+  - spending_history
+  - text
+  - time
+  - timezone
+  - transactions
+  - transfer
+  - translate
+  - travel_notification
+  - travel_suggestion
+  - update_playlist
+  - weather
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_amh.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b3a3ee270683d5cc57a6e6ce81a3fe971f6c04e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_amh.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_amh_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_eng.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..240c37d5f1cd4197314c51532ab25ab2e915e2ad
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_eng.yaml
@@ -0,0 +1,16 @@
+# Generated by utils.py
+dataset_name: eng
+validation_split: train
+test_split: test
+fewshot_split: train
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_ewe.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c08d8bb0c151a812b1cd6d5131e4a0d1664f8725
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_ewe_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_hau.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e1338c72cda04bbbbca713f83dd6aa715f19014
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_ibo.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e4a956d23c414332d353f9c7d04ac3ce876831ed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_kin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f55d787a3f4952afcbb3f70d9432bfc7dbf0a84e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_lin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2cc08df484bf58f9eaf2d498074eb1ac5dc72338
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_lin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_lin_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_lug.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1a421577bbe8ff9ce34b00da45fdb3efcb22e9d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_lug.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_orm.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b95a4e7b9cb3afe8c745cbd62a94c3fad6a5314
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_orm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_orm_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_sna.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cbf0105abe8b3157df4c6898e3873fb25beba28
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_sna.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_sna_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_sot.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad3b4497f8dbbb678048dc9dfa8aa8894dd241fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_sot.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_sot_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_swa.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc3d797c05c018baf599dcee28bccc0dd5c5ab72
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_twi.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..73fc61c7e6ab11fbf71d8842818f020f147b5443
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_twi_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_wol.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7d359d2f8e4cb91028705cc8c76842c9b5e78c3c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_wol.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_wol_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_xho.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4d9c173aac2832665724358697d59d3bf8f38e56
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_xho.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_xho_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_yor.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..682e01c12972c8b9a98e53711b307ebbf62676fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_zul.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d38a78141e1912ff995f6d87c0753f317ec6ad0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/injongointent_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Given the text: ''{{text}}'', determine the correct intent from the
+  following list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Only
+  output one intent from the list.'
+include: injongointent
+task: injongointent_zul_prompt_1
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_1/utils.py b/lm_eval/tasks/afrobench/injongointent/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_1/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent
new file mode 100644
index 0000000000000000000000000000000000000000..dfcb82678a61524c08bfd2d7e2d2ec0a50330f27
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent
@@ -0,0 +1,75 @@
+tag:
+- injongointent_tasks
+- injongointent_prompt_2
+dataset_path: masakhane/InjongoIntent
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: intent
+doc_to_choice:
+  - alarm
+  - balance
+  - bill_balance
+  - book_flight
+  - book_hotel
+  - calendar_update
+  - cancel_reservation
+  - car_rental
+  - confirm_reservation
+  - cook_time
+  - exchange_rate
+  - food_last
+  - freeze_account
+  - ingredients_list
+  - interest_rate
+  - international_visa
+  - make_call
+  - meal_suggestion
+  - min_payment
+  - pay_bill
+  - pin_change
+  - play_music
+  - plug_type
+  - recipe
+  - restaurant_reservation
+  - restaurant_reviews
+  - restaurant_suggestion
+  - share_location
+  - shopping_list_update
+  - spending_history
+  - text
+  - time
+  - timezone
+  - transactions
+  - transfer
+  - translate
+  - travel_notification
+  - travel_suggestion
+  - update_playlist
+  - weather
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_amh.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c1b2189ac768bef8c6263ccc41a003cd00ee6d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_amh.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_amh_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_eng.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc03705d5868b6c67b9973937f4bba59f08bd8c7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_eng.yaml
@@ -0,0 +1,16 @@
+# Generated by utils.py
+dataset_name: eng
+validation_split: train
+test_split: test
+fewshot_split: train
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_ewe.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..58eb914472609d29836edb92f5c676deac50166a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_ewe_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_hau.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7745369ac6af719ea71e9f4e3032bcc7b66a8ac
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_ibo.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b47052d71829c170849a07957a0412b8f21bddb2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_kin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..457935674bd84bb39e96d8ba000497430963f983
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_lin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54e7fcb71bb91ce4272dbd4723b7e2059da71c12
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_lin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_lin_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_lug.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..96aa42fc9e2bed859ca386091267ca48b9566399
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_lug.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_orm.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..872f96c542cd697bef3b0f92294e9247cedb458a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_orm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_orm_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_sna.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a62dfe34a4611319a56bc06f7bb01c447ac7ad6f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_sna.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_sna_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_sot.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9ca6a5675529c7bb3701d332c20eb1c2af19d53
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_sot.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_sot_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_swa.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..339f66ac8d569245f6e8cab2f3456fcea24713d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_twi.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b758bce0493c722c0d995cdbf8cc5e4b409e4af9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_twi_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_wol.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b573a444b22b2e34ed10ee9f75a914f16177bfc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_wol.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_wol_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_xho.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2c02205fdb7b9701096089c26d6813fff802dd7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_xho.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_xho_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_yor.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c821736809b4a37d94dedc82e6943594585ce35
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_zul.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8a541b66bfb14f692c1230871f2482452eb7347
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/injongointent_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'Analyze the text: ''{{text}}''. Choose the most appropriate intent from
+  these options: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Respond
+  with only the selected intent.'
+include: injongointent
+task: injongointent_zul_prompt_2
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_2/utils.py b/lm_eval/tasks/afrobench/injongointent/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_2/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent
new file mode 100644
index 0000000000000000000000000000000000000000..afdf43cfc10b75238debbd5dbab36ac493872025
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent
@@ -0,0 +1,75 @@
+tag:
+- injongointent_tasks
+- injongointent_prompt_3
+dataset_path: masakhane/InjongoIntent
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: intent
+doc_to_choice:
+  - alarm
+  - balance
+  - bill_balance
+  - book_flight
+  - book_hotel
+  - calendar_update
+  - cancel_reservation
+  - car_rental
+  - confirm_reservation
+  - cook_time
+  - exchange_rate
+  - food_last
+  - freeze_account
+  - ingredients_list
+  - interest_rate
+  - international_visa
+  - make_call
+  - meal_suggestion
+  - min_payment
+  - pay_bill
+  - pin_change
+  - play_music
+  - plug_type
+  - recipe
+  - restaurant_reservation
+  - restaurant_reviews
+  - restaurant_suggestion
+  - share_location
+  - shopping_list_update
+  - spending_history
+  - text
+  - time
+  - timezone
+  - transactions
+  - transfer
+  - translate
+  - travel_notification
+  - travel_suggestion
+  - update_playlist
+  - weather
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_amh.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7bd62c5b00eedcfb1cd254ae679426955cacb8a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_amh.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_amh_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_eng.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..258f0cfab4a4dc9ee8f7f8baceb2c2bac35c0c02
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_eng.yaml
@@ -0,0 +1,16 @@
+# Generated by utils.py
+dataset_name: eng
+validation_split: train
+test_split: test
+fewshot_split: train
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_ewe.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12688cc9d97dcacc50419a91707ac2f5fa47363b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_ewe_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_hau.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8414a09bae932411fd51cfbcf9d5efbbb2d93a45
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_ibo.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f254438388e95b551fd9344758851b0a9fc8768
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_kin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b946cf004f775642ad8f0b9d5995820b97187f0a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_lin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a485d5ce807f0f9200ccdf535d5427697561bbd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_lin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_lin_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_lug.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..71376ec3f8a3ae742cc694ea0d19c57ad9187b0b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_lug.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_orm.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..706f3a908a221dd3bc876adae4ddc40b6b4cb6ea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_orm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_orm_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_sna.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f4aca73782fbcc8516983e588271bb336f674f2b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_sna.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_sna_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_sot.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57e27afab1edb9509d4f9901bea7fc114f249a09
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_sot.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_sot_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_swa.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6cb4886d1f1d9ba9c87cbc78a436dad83d72487e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_twi.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8623bf33bcc89d2b25390b487617159263327f6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_twi_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_wol.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..afc3cf4a907143eb7a13dca4c4783a0c715e1524
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_wol.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_wol_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_xho.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f41aa561fbd6a6da39e9f90d93c4fa909ffbf72
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_xho.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_xho_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_yor.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a5d5686de20ff32a1a4207ea2711d2176464df5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_zul.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f857ff065b13b0eb050107b884a7b57f795dc779
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/injongointent_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'You are a linguistic analyst trained to understand user intent. Based
+  on the text: ''{{text}}'', choose the intent that best matches from this list: [alarm,
+  balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_zul_prompt_3
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_3/utils.py b/lm_eval/tasks/afrobench/injongointent/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_3/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent
new file mode 100644
index 0000000000000000000000000000000000000000..5d5c05ae113bdcef17764decefc59f335ddb3ba3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent
@@ -0,0 +1,75 @@
+tag:
+- injongointent_tasks
+- injongointent_prompt_4
+dataset_path: masakhane/InjongoIntent
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: intent
+doc_to_choice:
+  - alarm
+  - balance
+  - bill_balance
+  - book_flight
+  - book_hotel
+  - calendar_update
+  - cancel_reservation
+  - car_rental
+  - confirm_reservation
+  - cook_time
+  - exchange_rate
+  - food_last
+  - freeze_account
+  - ingredients_list
+  - interest_rate
+  - international_visa
+  - make_call
+  - meal_suggestion
+  - min_payment
+  - pay_bill
+  - pin_change
+  - play_music
+  - plug_type
+  - recipe
+  - restaurant_reservation
+  - restaurant_reviews
+  - restaurant_suggestion
+  - share_location
+  - shopping_list_update
+  - spending_history
+  - text
+  - time
+  - timezone
+  - transactions
+  - transfer
+  - translate
+  - travel_notification
+  - travel_suggestion
+  - update_playlist
+  - weather
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_amh.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa14ee5b178f7577c036039b089678bcfa697a04
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_amh.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'You are a Amharic linguistic analyst trained to understand Amharic user
+  intent. Based on the Amharictext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_amh_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_eng.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..853e64965251e37e59efe72554557b2a378e358f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_eng.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: eng
+validation_split: train
+test_split: test
+fewshot_split: train
+doc_to_text: 'You are a English linguistic analyst trained to understand English user
+  intent. Based on the English text: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_eng_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_ewe.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f61a3db57d2bcf2af8cf30dc65ac08b896013fbe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'You are a Ewe linguistic analyst trained to understand Ewe user intent.
+  Based on the Ewetext: ''{{text}}'', choose the intent that best matches from this
+  list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_ewe_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_hau.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fdef34cb847fbc8008ed496826bc2cb79361a546
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_hau.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'You are a Hausa linguistic analyst trained to understand Hausa user
+  intent. Based on the Hausatext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_ibo.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23b59831ed97b56ed983d26871d155b1f72a176b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'You are a Igbo linguistic analyst trained to understand Igbo user intent.
+  Based on the Igbotext: ''{{text}}'', choose the intent that best matches from this
+  list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_kin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28f05aeb00423bb80f2f070763f3f22345ae4776
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_kin.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'You are a Kinyarwanda linguistic analyst trained to understand Kinyarwanda
+  user intent. Based on the Kinyarwandatext: ''{{text}}'', choose the intent that
+  best matches from this list: [alarm, balance, bill_balance, book_flight, book_hotel,
+  calendar_update, cancel_reservation, car_rental, confirm_reservation, cook_time,
+  exchange_rate, food_last, freeze_account, ingredients_list, interest_rate, international_visa,
+  make_call, meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type,
+  recipe, restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_kin_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_lin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df991d89146112468bae83c7c5fe87eef307dc49
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_lin.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'You are a Lingala linguistic analyst trained to understand Lingala user
+  intent. Based on the Lingalatext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_lin_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_lug.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1abb66edb31a6a76e987e665f91f630fe8d3416
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_lug.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'You are a Luganda linguistic analyst trained to understand Luganda user
+  intent. Based on the Lugandatext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_lug_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_orm.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..195ff4a232e782d38bae93b33648535934ff07e4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_orm.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'You are a Oromo linguistic analyst trained to understand Oromo user
+  intent. Based on the Oromotext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_orm_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_sna.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23d066c3d8c8b41e7184f867ba492d58a8736d82
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_sna.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'You are a Shona linguistic analyst trained to understand Shona user
+  intent. Based on the Shonatext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_sna_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_sot.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82102a21e2e7bb0a3d57a6fff4dd678b587d1d95
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_sot.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'You are a Sotho linguistic analyst trained to understand Sotho user
+  intent. Based on the Sothotext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_sot_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_swa.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..031ffbb40ceba3233e5f396e38a523f2bca81ad9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_swa.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'You are a Swahili linguistic analyst trained to understand Swahili user
+  intent. Based on the Swahilitext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_twi.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a569b3cec8f6858d041e9d0c0c876720558b808b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'You are a Twi linguistic analyst trained to understand Twi user intent.
+  Based on the Twitext: ''{{text}}'', choose the intent that best matches from this
+  list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_twi_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_wol.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a55398ab4161ec54e971863cd1b4bfb41330b093
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_wol.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'You are a Wolof linguistic analyst trained to understand Wolof user
+  intent. Based on the Woloftext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_wol_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_xho.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d773a1756e17eb5cd21b3bc550b5847acaae671c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_xho.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'You are a Xhosa linguistic analyst trained to understand Xhosa user
+  intent. Based on the Xhosatext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_xho_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_yor.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..af01d9f3e8efc1d57003194dd0201dd76bd76fbf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_yor.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'You are a Yoruba linguistic analyst trained to understand Yoruba user
+  intent. Based on the Yorubatext: ''{{text}}'', choose the intent that best matches
+  from this list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather]. Return
+  only the intent.'
+include: injongointent
+task: injongointent_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_zul.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b6e5aace303095eccb19b8a011559555cee7eb2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/injongointent_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'You are a Zulu linguistic analyst trained to understand Zulu user intent.
+  Based on the Zulutext: ''{{text}}'', choose the intent that best matches from this
+  list: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather]. Return only the intent.'
+include: injongointent
+task: injongointent_zul_prompt_4
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_4/utils.py b/lm_eval/tasks/afrobench/injongointent/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_4/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent
new file mode 100644
index 0000000000000000000000000000000000000000..0012857bdaa787ad8bf9ba345330844c8b266a8e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent
@@ -0,0 +1,75 @@
+tag:
+- injongointent_tasks
+- injongointent_prompt_5
+dataset_path: masakhane/InjongoIntent
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: intent
+doc_to_choice:
+  - alarm
+  - balance
+  - bill_balance
+  - book_flight
+  - book_hotel
+  - calendar_update
+  - cancel_reservation
+  - car_rental
+  - confirm_reservation
+  - cook_time
+  - exchange_rate
+  - food_last
+  - freeze_account
+  - ingredients_list
+  - interest_rate
+  - international_visa
+  - make_call
+  - meal_suggestion
+  - min_payment
+  - pay_bill
+  - pin_change
+  - play_music
+  - plug_type
+  - recipe
+  - restaurant_reservation
+  - restaurant_reviews
+  - restaurant_suggestion
+  - share_location
+  - shopping_list_update
+  - spending_history
+  - text
+  - time
+  - timezone
+  - transactions
+  - transfer
+  - translate
+  - travel_notification
+  - travel_suggestion
+  - update_playlist
+  - weather
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_amh.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a6623a9387a98d2d21791f51d0a8690b756c9a4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_amh.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'The following text is in Amharic: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_amh_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_eng.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4dcbebbbd00cf8a3abb7f5e848d3a6b8520d46a3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_eng.yaml
@@ -0,0 +1,16 @@
+# Generated by utils.py
+dataset_name: eng
+validation_split: train
+test_split: test
+fewshot_split: train
+doc_to_text: 'The following text is in English: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_eng_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_ewe.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cab84252dcbfdc4d1eefcd2f4aa36b031edd7ba
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: 'The following text is in Ewe: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_ewe_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_hau.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b6275db8383eddada828f7cb1963d554b4f8d658
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'The following text is in Hausa: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_ibo.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..518ec898411de5d141ac16137578e710fc7bb60e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'The following text is in Igbo: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_kin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..348535c679af3c08da162c812fbfd700557e8326
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: 'The following text is in Kinyarwanda: ''{{text}}''. Given the list of
+  intents: [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update,
+  cancel_reservation, car_rental, confirm_reservation, cook_time, exchange_rate, food_last,
+  freeze_account, ingredients_list, interest_rate, international_visa, make_call,
+  meal_suggestion, min_payment, pay_bill, pin_change, play_music, plug_type, recipe,
+  restaurant_reservation, restaurant_reviews, restaurant_suggestion, share_location,
+  shopping_list_update, spending_history, text, time, timezone, transactions, transfer,
+  translate, travel_notification, travel_suggestion, update_playlist, weather], identify
+  the intent expressed in the text. Return only the identified intent.'
+include: injongointent
+task: injongointent_kin_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_lin.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75bbf4ec5935505c554d5c7b58934188d1591532
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_lin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'The following text is in Lingala: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_lin_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_lug.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..49b7f6faddd5ad59c079de1a1986e33ef4a29311
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_lug.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'The following text is in Luganda: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_lug_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_orm.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72a7686934892b2f21f3098b302dc601050b394b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_orm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'The following text is in Oromo: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_orm_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_sna.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8931b65ce15999ea1215cf420a671baed32a51c2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_sna.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'The following text is in Shona: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_sna_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_sot.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a8d0328e75acaead01f4e08fb4c6ed26eed6314
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_sot.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sot
+doc_to_text: 'The following text is in Sotho: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_sot_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_swa.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1da6be32a6c64ec6583c70a2f0cbaa3d6aa3d435
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'The following text is in Swahili: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_twi.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc78ae4f60d64d9d30a5bf52064fda54e0bce9ef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: 'The following text is in Twi: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_twi_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_wol.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c71483ec36660b62bc945b4ebf89fdb77034e85
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_wol.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: 'The following text is in Wolof: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_wol_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_xho.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d8b543fc5cac462b31a9ae806aa3ce0f60b6ba9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_xho.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'The following text is in Xhosa: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_xho_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_yor.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cbe285688f78a2ec210740e0294e4a3c6fde8dd7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'The following text is in Yoruba: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_zul.yaml b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ba384db5a8b2f7f17601ad7fff0bed8327ecc1c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/injongointent_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: 'The following text is in Zulu: ''{{text}}''. Given the list of intents:
+  [alarm, balance, bill_balance, book_flight, book_hotel, calendar_update, cancel_reservation,
+  car_rental, confirm_reservation, cook_time, exchange_rate, food_last, freeze_account,
+  ingredients_list, interest_rate, international_visa, make_call, meal_suggestion,
+  min_payment, pay_bill, pin_change, play_music, plug_type, recipe, restaurant_reservation,
+  restaurant_reviews, restaurant_suggestion, share_location, shopping_list_update,
+  spending_history, text, time, timezone, transactions, transfer, translate, travel_notification,
+  travel_suggestion, update_playlist, weather], identify the intent expressed in the
+  text. Return only the identified intent.'
+include: injongointent
+task: injongointent_zul_prompt_5
diff --git a/lm_eval/tasks/afrobench/injongointent/prompt_5/utils.py b/lm_eval/tasks/afrobench/injongointent/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/injongointent/prompt_5/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/mafand/README.md b/lm_eval/tasks/afrobench/mafand/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9e7eea17598d29defff07bb37c4f47efaa446547
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/README.md
@@ -0,0 +1,73 @@
+#
+
+## Paper
+Title: `A Few Thousand Translations Go a Long Way! Leveraging Pre-trained Models for African News Translation`
+
+Paper Link: https://aclanthology.org/2022.naacl-main.223/
+
+## Abstract
+>Recent advances in the pre-training of language models leverage large-scale datasets to create multilingual models. However, low-resource languages are mostly left out in these datasets. This is primarily because many widely spoken languages are not well represented on the web and therefore excluded from the large-scale crawls used to create datasets. Furthermore, downstream users of these models are restricted to the selection of languages originally chosen for pre-training. This work investigates how to optimally leverage existing pre-trained models to create low-resource translation systems for 16 African languages. We focus on two questions: 1) How can pre-trained models be used for languages not included in the initial pre-training? and 2) How can the resulting translation models effectively transfer to new domains? To answer these questions, we create a new African news corpus covering 16 languages, of which eight languages are not part of any existing evaluation dataset. We demonstrate that the most effective strategy for transferring both to additional languages and to additional domains is to fine-tune large pre-trained models on small quantities of high-quality translation data.
+
+HomePage: https://github.com/masakhane-io/lafand-mt
+
+### Citation
+
+```
+@inproceedings{adelani-etal-2022-thousand,
+    title = "A Few Thousand Translations Go a Long Way! Leveraging Pre-trained Models for {A}frican News Translation",
+    author = "Adelani, David  and
+      Alabi, Jesujoba  and
+      Fan, Angela  and
+      Kreutzer, Julia  and
+      Shen, Xiaoyu  and
+      Reid, Machel  and
+      Ruiter, Dana  and
+      Klakow, Dietrich  and
+      Nabende, Peter  and
+      Chang, Ernie  and
+      Gwadabe, Tajuddeen  and
+      Sackey, Freshia  and
+      Dossou, Bonaventure F. P.  and
+      Emezue, Chris  and
+      Leong, Colin  and
+      Beukman, Michael  and
+      Muhammad, Shamsuddeen  and
+      Jarso, Guyo  and
+      Yousuf, Oreen  and
+      Niyongabo Rubungo, Andre  and
+      Hacheme, Gilles  and
+      Wairagala, Eric Peter  and
+      Nasir, Muhammad Umair  and
+      Ajibade, Benjamin  and
+      Ajayi, Tunde  and
+      Gitau, Yvonne  and
+      Abbott, Jade  and
+      Ahmed, Mohamed  and
+      Ochieng, Millicent  and
+      Aremu, Anuoluwapo  and
+      Ogayo, Perez  and
+      Mukiibi, Jonathan  and
+      Ouoba Kabore, Fatoumata  and
+      Kalipe, Godson  and
+      Mbaye, Derguene  and
+      Tapo, Allahsera Auguste  and
+      Memdjokam Koagne, Victoire  and
+      Munkoh-Buabeng, Edwin  and
+      Wagner, Valencia  and
+      Abdulmumin, Idris  and
+      Awokoya, Ayodele  and
+      Buzaaba, Happy  and
+      Sibanda, Blessing  and
+      Bukula, Andiswa  and
+      Manthalu, Sam",
+    booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
+    month = jul,
+    year = "2022",
+    address = "Seattle, United States",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2022.naacl-main.223",
+    doi = "10.18653/v1/2022.naacl-main.223",
+    pages = "3053--3070",
+    abstract = "Recent advances in the pre-training for language models leverage large-scale datasets to create multilingual models. However, low-resource languages are mostly left out in these datasets. This is primarily because many widely spoken languages that are not well represented on the web and therefore excluded from the large-scale crawls for datasets. Furthermore, downstream users of these models are restricted to the selection of languages originally chosen for pre-training. This work investigates how to optimally leverage existing pre-trained models to create low-resource translation systems for 16 African languages. We focus on two questions: 1) How can pre-trained models be used for languages not included in the initial pretraining? and 2) How can the resulting translation models effectively transfer to new domains? To answer these questions, we create a novel African news corpus covering 16 languages, of which eight languages are not part of any existing evaluation dataset. We demonstrate that the most effective strategy for transferring both additional languages and additional domains is to leverage small quantities of high-quality translation data to fine-tune large pre-trained models.",
+}
+```
diff --git a/lm_eval/tasks/afrobench/mafand/gen_utils.py b/lm_eval/tasks/afrobench/mafand/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c260a321a419b3013545b738850af5f796a1bc32
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/gen_utils.py
@@ -0,0 +1,147 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang, lang_dict):
+    language_column_name = f"{lang}_text"
+    prompt_map = {
+        "prompt_1": "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{lang_dict[lang]} into English. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{lang_dict[lang]}: {{{{{language_column_name}}}}} \nEnglish: ",
+        "prompt_1_reverse": "You are an advanced Translator, a specialized assistant designed to translate documents "
+        f"from English into {lang_dict[lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. "
+        f"\nEnglish: {{eng_text}} \n{lang_dict[lang]}: ",
+        "prompt_2": f"{lang_dict[lang]} sentence: {{{{{language_column_name}}}}} \nEnglish sentence: ",
+        "prompt_2_reverse": "English sentence: {{eng_text}} "
+        f"\n{lang_dict[lang]} sentence: ",
+        "prompt_3": f"You are a translation expert. Translate the following {lang_dict[lang]} sentences to English \n"
+        f"{lang_dict[lang]} sentence: {{{{{language_column_name}}}}}\nEnglish sentence: ",
+        "prompt_3_reverse": f"You are a translation expert. Translate the following English sentences to "
+        f"{lang_dict[lang]} "
+        "\nEnglish sentence: {{eng_text}} "
+        f"\n{lang_dict[lang]} sentence: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse: bool) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "amh": "Amharic",
+        "bam": "Bambara",
+        "bbj": "Gbomala",
+        "ewe": "Ewe",
+        "fon": "Fon",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "kin": "Kinyarwanda",
+        "lug": "Luganda",
+        "luo": "Luo",
+        "mos": "Mossi",
+        "nya": "Chichewa",
+        "pcm": "Nigerian Pidgin",
+        "sna": "Shona",
+        "swa": "Swahili",
+        "tsn": "Setswana",
+        "twi": "Twi",
+        "wol": "Wolof",
+        "xho": "Xhosa",
+        "yor": "Yoruba",
+        "zul": "Zulu",
+    }
+
+    french_langs = ["bam", "bbj", "ewe", "fon", "wol", "mos"]
+
+    for lang in languages.keys():
+        try:
+            norm_lang = f"{lang}-en" if lang not in french_langs else f"{lang}-fr"
+            reverse_lang = f"en-{lang}" if lang not in french_langs else f"fr-{lang}"
+            dataset_name = norm_lang if reverse else reverse_lang
+            file_name = f"mafand_{dataset_name}.yaml"
+            task_name = f"mafand_{dataset_name}_{mode}"
+            yaml_template = "mafand"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": reverse_lang,
+            }
+            file_dir = (
+                f"{output_dir}/{mode}/african-english"
+                if reverse
+                else f"{output_dir}/{mode}/english-african"
+            )
+            os.makedirs(file_dir, exist_ok=True)
+            with open(
+                f"{file_dir}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_3",
+        choices=["prompt_1", "prompt_2", "prompt_3"],
+        help="Prompt number",
+    )
+    parser.add_argument(
+        "--reverse",
+        default=True,
+        choices=[True, False],
+        help="Reverse the translation direction",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(
+        output_dir=args.output_dir,
+        overwrite=args.overwrite,
+        mode=args.mode,
+        reverse=args.reverse,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/mafand/mafand.yaml b/lm_eval/tasks/afrobench/mafand/mafand.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ef8619addf5df6be4d59e31540c6ed6663e6b189
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/mafand.yaml
@@ -0,0 +1,14 @@
+group: mafand
+task:
+  - mafand_eng-afr_prompt_1
+  - mafand_eng-afr_prompt_2
+  - mafand_eng-afr_prompt_3
+  - mafand_afr-eng_prompt_1
+  - mafand_afr-eng_prompt_2
+  - mafand_afr-eng_prompt_3
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand
new file mode 100644
index 0000000000000000000000000000000000000000..4f2047be0877cfda1e41acc76e491478c1f8f8f0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand
@@ -0,0 +1,28 @@
+tag:
+- mafand_tasks
+- mafand_afr-eng
+- mafand_afr-eng_prompt_1
+- afrobench_MT_tasks
+dataset_path: masakhane/mafand
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: validation
+fewshot_split: validation
+test_split: test
+doc_to_target: !function utils.get_target
+doc_to_text: !function utils.create_text_prompt_1
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_amh-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_amh-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95e87fd8aeb3df35fd529338e719683805a78f18
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_amh-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-amh
+include: mafand
+task: mafand_amh-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_bam-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_bam-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dbc612ac327d46f46c4df459d558c8429d2089dd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_bam-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bam
+include: mafand
+task: mafand_bam-fr_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_bbj-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_bbj-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abe64f9b2a1dbf14bcf60f9f4f80df24f65821ed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_bbj-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bbj
+include: mafand
+task: mafand_bbj-fr_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_ewe-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_ewe-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ecd9b38bcdfda7354569bc002e3ed10ff573449f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_ewe-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-ewe
+include: mafand
+task: mafand_ewe-fr_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_fon-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_fon-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..705cfbb855261a2cc14d841bde89661fa5d6be75
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_fon-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-fon
+include: mafand
+task: mafand_fon-fr_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_hau-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_hau-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b84d9cecabfd92350a9ab63585f38fcfff328d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_hau-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-hau
+include: mafand
+task: mafand_hau-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_ibo-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_ibo-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d78c91bb1fc61797c9554b9cf8acb7c099e53919
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_ibo-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-ibo
+include: mafand
+task: mafand_ibo-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_kin-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_kin-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..954c036e80de22ad705888d66705e58b2a15f689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_kin-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-kin
+include: mafand
+task: mafand_kin-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_lug-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_lug-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..671c072adc79858fc9eb1f2c32b99420b26fcb95
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_lug-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-lug
+include: mafand
+task: mafand_lug-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_luo-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_luo-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a1d5965f08d62f2a8147a39878f844d714800ade
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_luo-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-luo
+include: mafand
+task: mafand_luo-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_mos-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_mos-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da085707bc8ec37730535867ad2faa80e23bfa20
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_mos-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-mos
+include: mafand
+task: mafand_mos-fr_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_nya-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_nya-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3bc2426687b5784602b1bc1bcb64c453d79d39ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_nya-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-nya
+include: mafand
+task: mafand_nya-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_pcm-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_pcm-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6fb5dee6d393fbe6a6172e347e382cee2aecfb5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_pcm-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-pcm
+include: mafand
+task: mafand_pcm-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_sna-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_sna-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..283517d6ac4f9f4c8e462a63002d11a681c438b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_sna-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-sna
+include: mafand
+task: mafand_sna-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_swa-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_swa-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..476bba42d82461b241b2b8a396baefe20c154206
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_swa-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-swa
+include: mafand
+task: mafand_swa-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_tsn-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_tsn-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a94c5b6ea0f870db19b4fc3c692be95a3fc6d455
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_tsn-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-tsn
+include: mafand
+task: mafand_tsn-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_twi-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_twi-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f5883b12d6875133699a8158dd2151c8c784981
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_twi-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-twi
+include: mafand
+task: mafand_twi-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_wol-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_wol-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb887188000da8e83aeed287619ed1cd2375e18a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_wol-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-wol
+include: mafand
+task: mafand_wol-fr_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_xho-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_xho-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0561b4157c15f0fa5f8f3876ab86d07087a5a0a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_xho-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-xho
+include: mafand
+task: mafand_xho-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_yor-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_yor-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec97ae7d3c4abc88a4774dc69cb32e2d7dced32d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_yor-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-yor
+include: mafand
+task: mafand_yor-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_zul-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_zul-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9649d772d259a5519e87ae522f80b3d591f1b2be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/mafand_zul-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-zul
+include: mafand
+task: mafand_zul-en_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/utils.py b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df3a329824d44fa94eb830ae943fa30dd32bab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/african-english/utils.py
@@ -0,0 +1,121 @@
+languages = {
+    "amh": "Amharic",
+    "bam": "Bambara",
+    "bbj": "Gbomala",
+    "ewe": "Ewe",
+    "fon": "Fon",
+    "hau": "Hausa",
+    "ibo": "Igbo",
+    "kin": "Kinyarwanda",
+    "lug": "Luganda",
+    "luo": "Luo",
+    "mos": "Mossi",
+    "nya": "Chichewa",
+    "pcm": "Nigerian Pidgin",
+    "sna": "Shona",
+    "swa": "Swahili",
+    "tsn": "Setswana",
+    "twi": "Twi",
+    "wol": "Wolof",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "zul": "Zulu",
+}
+
+
+def get_target(doc):
+    target = (
+        doc["translation"]["en"]
+        if "en" in doc["translation"].keys()
+        else doc["translation"]["fr"]
+    )
+    return target
+
+
+def get_target_reverse(doc):
+    target_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    target = doc["translation"][target_key]
+    return target
+
+
+def create_text_prompt_1(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{languages[source_key]} into {source_lang}. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{languages[source_key]}: {source_sentence} \n{source_lang}: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_1(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{source_lang} into {languages[target_lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. \n{source_lang}: {source_sentence} \n{languages[target_lang]}: "
+    )
+    return prompt
+
+
+def create_text_prompt_2(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"{languages[source_key]} sentence: {source_sentence} \n{source_lang} sentence: ",
+    )
+    return prompt
+
+
+def create_reverse_prompt_2(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"{source_lang} sentence: {source_sentence} \n{languages[target_lang]} sentence: \n",
+    )
+    return prompt
+
+
+def create_text_prompt_3(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"You are a translation expert. Translate the following {languages[source_key]} sentences "
+        f"to {source_lang}. \n{languages[source_key]} sentence: {source_sentence}\n{source_lang} sentence: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_3(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"You are a translation expert. Translate the following {source_lang} sentence into {languages[target_lang]}\n"
+        f"{source_lang} sentence: {source_sentence}\n{languages[target_lang]} sentence: "
+    )
+    return prompt
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand
new file mode 100644
index 0000000000000000000000000000000000000000..1d004556267924e0120e95fb516fee72a5d3eb1d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand
@@ -0,0 +1,28 @@
+tag:
+- mafand_tasks
+- mafand_eng-afr
+- mafand_eng-afr_prompt_1
+- afrobench_MT_tasks
+dataset_path: masakhane/mafand
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: validation
+fewshot_split: validation
+test_split: test
+doc_to_target: !function utils.get_target_reverse
+doc_to_text: !function utils.create_reverse_prompt_1
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-amh.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ef9ab288615c42f1c4c04538f0b51e3d2245ac1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-amh
+include: mafand
+task: mafand_en-amh_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-hau.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ea577781deda41a7217dbd757158789a80dafc6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-hau
+include: mafand
+task: mafand_en-hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-ibo.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..88af221fb31e583d9238c95c05dffb44306a617e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-ibo
+include: mafand
+task: mafand_en-ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-kin.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c415f511ad246eb99509f4bedb979810dfcf20d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-kin
+include: mafand
+task: mafand_en-kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-lug.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..94070e8986b0f44eecb1097c673c7daf4aec8067
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-lug
+include: mafand
+task: mafand_en-lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-luo.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc6b15c67f2919dca92ef84d058a93da6e0271a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-luo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-luo
+include: mafand
+task: mafand_en-luo_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-nya.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..225a46474ff2c694da98558bfaf665bb8e102248
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-nya.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-nya
+include: mafand
+task: mafand_en-nya_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-pcm.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..69380c7182bdfc8f8e25c2ee16b3ced4a5c05d49
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-pcm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-pcm
+include: mafand
+task: mafand_en-pcm_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-sna.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..634d988fd46f0a98b6e5b76e4a42f897edb4871d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-sna
+include: mafand
+task: mafand_en-sna_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-swa.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bfbf259cbbe8d6d740dc7522f6d8bd6542c790a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-swa
+include: mafand
+task: mafand_en-swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-tsn.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..faa99ddf5186515253279585aa1439569d7de9b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-tsn.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-tsn
+include: mafand
+task: mafand_en-tsn_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-twi.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9294975da5385d196db9abc9f22d087a2c9cce0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-twi
+include: mafand
+task: mafand_en-twi_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-xho.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..244f5cabd9901020a136322842ec47fb181faed6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-xho
+include: mafand
+task: mafand_en-xho_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-yor.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa3189779577ecbc58f80ff4973a63d81ea1c6a6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-yor
+include: mafand
+task: mafand_en-yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-zul.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6afdc0c5648d8fec0be6d2d2fca0b9416dccffab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_en-zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-zul
+include: mafand
+task: mafand_en-zul_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-bam.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c21d96f275a14b81225b3979044a23909bfb023
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-bam.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bam
+include: mafand
+task: mafand_fr-bam_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-bbj.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..76cf07507625677c85772e988ac66741508efbc6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-bbj.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bbj
+include: mafand
+task: mafand_fr-bbj_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-ewe.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c7bd6671b7ba6da2a869cf12862046b632396e6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-ewe
+include: mafand
+task: mafand_fr-ewe_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-fon.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..737d68eba79405276a96151a33786387c33cc148
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-fon.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-fon
+include: mafand
+task: mafand_fr-fon_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-mos.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9186a5b9f6670f5187284981d547d91631cc94f6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-mos.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-mos
+include: mafand
+task: mafand_fr-mos_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-wol.yaml b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6e29f5fb98a138f3a131c62b685abc25b60e3f69
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/mafand_fr-wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-wol
+include: mafand
+task: mafand_fr-wol_prompt_1
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/utils.py b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df3a329824d44fa94eb830ae943fa30dd32bab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_1/english-african/utils.py
@@ -0,0 +1,121 @@
+languages = {
+    "amh": "Amharic",
+    "bam": "Bambara",
+    "bbj": "Gbomala",
+    "ewe": "Ewe",
+    "fon": "Fon",
+    "hau": "Hausa",
+    "ibo": "Igbo",
+    "kin": "Kinyarwanda",
+    "lug": "Luganda",
+    "luo": "Luo",
+    "mos": "Mossi",
+    "nya": "Chichewa",
+    "pcm": "Nigerian Pidgin",
+    "sna": "Shona",
+    "swa": "Swahili",
+    "tsn": "Setswana",
+    "twi": "Twi",
+    "wol": "Wolof",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "zul": "Zulu",
+}
+
+
+def get_target(doc):
+    target = (
+        doc["translation"]["en"]
+        if "en" in doc["translation"].keys()
+        else doc["translation"]["fr"]
+    )
+    return target
+
+
+def get_target_reverse(doc):
+    target_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    target = doc["translation"][target_key]
+    return target
+
+
+def create_text_prompt_1(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{languages[source_key]} into {source_lang}. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{languages[source_key]}: {source_sentence} \n{source_lang}: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_1(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{source_lang} into {languages[target_lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. \n{source_lang}: {source_sentence} \n{languages[target_lang]}: "
+    )
+    return prompt
+
+
+def create_text_prompt_2(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"{languages[source_key]} sentence: {source_sentence} \n{source_lang} sentence: ",
+    )
+    return prompt
+
+
+def create_reverse_prompt_2(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"{source_lang} sentence: {source_sentence} \n{languages[target_lang]} sentence: \n",
+    )
+    return prompt
+
+
+def create_text_prompt_3(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"You are a translation expert. Translate the following {languages[source_key]} sentences "
+        f"to {source_lang}. \n{languages[source_key]} sentence: {source_sentence}\n{source_lang} sentence: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_3(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"You are a translation expert. Translate the following {source_lang} sentence into {languages[target_lang]}\n"
+        f"{source_lang} sentence: {source_sentence}\n{languages[target_lang]} sentence: "
+    )
+    return prompt
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand
new file mode 100644
index 0000000000000000000000000000000000000000..eb7ad9883115b42626fd30d223fa90ff6f133384
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand
@@ -0,0 +1,28 @@
+tag:
+- mafand_tasks
+- mafand_afr-eng
+- mafand_afr-eng_prompt_3
+- afrobench_MT_tasks
+dataset_path: masakhane/mafand
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: validation
+fewshot_split: validation
+test_split: test
+doc_to_target: !function utils.get_target
+doc_to_text: !function utils.create_text_prompt_2
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_amh-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_amh-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6db544cb5e17e87e6ebcf606ad59ad9a7435f338
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_amh-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-amh
+include: mafand
+task: mafand_amh-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_bam-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_bam-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a9f3b3ac09958dc4253d982dfe1c872aefafd7e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_bam-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bam
+include: mafand
+task: mafand_bam-fr_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_bbj-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_bbj-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0b42b23787e88bd5ff0d000c1806655e26f65cf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_bbj-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bbj
+include: mafand
+task: mafand_bbj-fr_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_ewe-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_ewe-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..457c0d1945bfa65f5d5f0e1ccffc30c11aadd452
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_ewe-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-ewe
+include: mafand
+task: mafand_ewe-fr_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_fon-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_fon-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84263d5a9ca41877f02d680660706eaeba28fea2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_fon-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-fon
+include: mafand
+task: mafand_fon-fr_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_hau-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_hau-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..05c31a4670a9300c1175dc2f7e109c024b146301
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_hau-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-hau
+include: mafand
+task: mafand_hau-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_ibo-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_ibo-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3cb4a5b897c2d143ef31d667c9d87fc35f40caa0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_ibo-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-ibo
+include: mafand
+task: mafand_ibo-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_kin-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_kin-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e3e1acf9a92a63ab80dbc0314adebfa59e449490
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_kin-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-kin
+include: mafand
+task: mafand_kin-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_lug-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_lug-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb68279d783b71ff94742507d4ed1571a03b6b51
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_lug-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-lug
+include: mafand
+task: mafand_lug-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_luo-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_luo-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12f199473ee6e4ab1f6a2d3ae6e69eb04ba6a399
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_luo-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-luo
+include: mafand
+task: mafand_luo-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_mos-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_mos-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a723701d3250aa78f7631a3dcfc450301201bf73
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_mos-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-mos
+include: mafand
+task: mafand_mos-fr_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_nya-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_nya-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24569f00825bb4a2e0419e290ae4ff1bc7e0d312
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_nya-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-nya
+include: mafand
+task: mafand_nya-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_pcm-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_pcm-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ec0c66a0a5c7fadbc7b9ec715fe53596d4c2b51
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_pcm-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-pcm
+include: mafand
+task: mafand_pcm-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_sna-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_sna-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3bf99b9955378aa6a5930d9783ed33dfea96ac95
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_sna-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-sna
+include: mafand
+task: mafand_sna-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_swa-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_swa-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb2ada0bba76e9c83a99d8060ac8f4146a1462c7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_swa-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-swa
+include: mafand
+task: mafand_swa-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_tsn-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_tsn-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d16e7e94c5b251eff2adece451f89c7af71fbc30
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_tsn-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-tsn
+include: mafand
+task: mafand_tsn-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_twi-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_twi-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..267337c177fc06e777d62c3a2286084d79266a4c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_twi-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-twi
+include: mafand
+task: mafand_twi-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_wol-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_wol-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6c67bd8d6df8d81d4d4088b6bbf8e04498afef6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_wol-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-wol
+include: mafand
+task: mafand_wol-fr_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_xho-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_xho-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd1960d0efbd42ba25132c938944692fbf63b92f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_xho-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-xho
+include: mafand
+task: mafand_xho-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_yor-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_yor-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb7241ad2cc2fc2f889bab56c4ad3e233a2d2165
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_yor-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-yor
+include: mafand
+task: mafand_yor-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_zul-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_zul-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d44db7a2eeefcc7dc0218f08327ceaee4a6e351a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/mafand_zul-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-zul
+include: mafand
+task: mafand_zul-en_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/utils.py b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df3a329824d44fa94eb830ae943fa30dd32bab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/african-english/utils.py
@@ -0,0 +1,121 @@
+languages = {
+    "amh": "Amharic",
+    "bam": "Bambara",
+    "bbj": "Gbomala",
+    "ewe": "Ewe",
+    "fon": "Fon",
+    "hau": "Hausa",
+    "ibo": "Igbo",
+    "kin": "Kinyarwanda",
+    "lug": "Luganda",
+    "luo": "Luo",
+    "mos": "Mossi",
+    "nya": "Chichewa",
+    "pcm": "Nigerian Pidgin",
+    "sna": "Shona",
+    "swa": "Swahili",
+    "tsn": "Setswana",
+    "twi": "Twi",
+    "wol": "Wolof",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "zul": "Zulu",
+}
+
+
+def get_target(doc):
+    target = (
+        doc["translation"]["en"]
+        if "en" in doc["translation"].keys()
+        else doc["translation"]["fr"]
+    )
+    return target
+
+
+def get_target_reverse(doc):
+    target_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    target = doc["translation"][target_key]
+    return target
+
+
+def create_text_prompt_1(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{languages[source_key]} into {source_lang}. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{languages[source_key]}: {source_sentence} \n{source_lang}: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_1(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{source_lang} into {languages[target_lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. \n{source_lang}: {source_sentence} \n{languages[target_lang]}: "
+    )
+    return prompt
+
+
+def create_text_prompt_2(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"{languages[source_key]} sentence: {source_sentence} \n{source_lang} sentence: ",
+    )
+    return prompt
+
+
+def create_reverse_prompt_2(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"{source_lang} sentence: {source_sentence} \n{languages[target_lang]} sentence: \n",
+    )
+    return prompt
+
+
+def create_text_prompt_3(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"You are a translation expert. Translate the following {languages[source_key]} sentences "
+        f"to {source_lang}. \n{languages[source_key]} sentence: {source_sentence}\n{source_lang} sentence: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_3(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"You are a translation expert. Translate the following {source_lang} sentence into {languages[target_lang]}\n"
+        f"{source_lang} sentence: {source_sentence}\n{languages[target_lang]} sentence: "
+    )
+    return prompt
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand
new file mode 100644
index 0000000000000000000000000000000000000000..35548392e118f0625d9adae32849389d5239cd3e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand
@@ -0,0 +1,28 @@
+tag:
+- mafand_tasks
+- mafand_eng-afr
+- mafand_eng-afr_prompt_2
+- afrobench_MT_tasks
+dataset_path: masakhane/mafand
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: validation
+fewshot_split: validation
+test_split: test
+doc_to_target: !function utils.get_target_reverse
+doc_to_text: !function utils.create_reverse_prompt_2
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-amh.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09c21d215e4cab74dda2b30ee110f55dcf2cbcbb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-amh
+include: mafand
+task: mafand_en-amh_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-hau.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9a91c76499bf37863e6b55c71ff88205b1e2599
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-hau
+include: mafand
+task: mafand_en-hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-ibo.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..568a845e403663c3d24e30383cab8eebe0d37151
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-ibo
+include: mafand
+task: mafand_en-ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-kin.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09425f64afdb68817d0f462545e67f5a1e2d5f07
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-kin
+include: mafand
+task: mafand_en-kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-lug.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13c91d36a6e5e0037a301c88fcf814309e29590f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-lug
+include: mafand
+task: mafand_en-lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-luo.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41bb09363d1c100ffea32d38eb2085400bec018b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-luo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-luo
+include: mafand
+task: mafand_en-luo_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-nya.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90a728107edf37bddd1d4eb80bcc6ddfaa49572e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-nya.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-nya
+include: mafand
+task: mafand_en-nya_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-pcm.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..73229c4f18ac24014cf15f161454910a921e1d02
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-pcm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-pcm
+include: mafand
+task: mafand_en-pcm_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-sna.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac37187170a032226e1b33dd87fb09c7b9952cf1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-sna
+include: mafand
+task: mafand_en-sna_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-swa.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21d9fc0e71589cb2e1c832a6b82e6d8fb5288b89
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-swa
+include: mafand
+task: mafand_en-swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-tsn.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b3dd43626d00774a8e9bd6e432322e4d608887ba
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-tsn.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-tsn
+include: mafand
+task: mafand_en-tsn_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-twi.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5502ffa4e5547433d7cba66191456178c5d3377e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-twi
+include: mafand
+task: mafand_en-twi_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-xho.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8c1ffee3415a1373a1b583819e874f968b5cfee
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-xho
+include: mafand
+task: mafand_en-xho_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-yor.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89c070c7c5e9f6d53fde695f156408f37038242d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-yor
+include: mafand
+task: mafand_en-yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-zul.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e54725404ba481e668156618a97f0b11d1a1fb31
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_en-zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-zul
+include: mafand
+task: mafand_en-zul_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-bam.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..15c6e981d1cb878e26a08755f5bf1fd7629f2525
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-bam.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bam
+include: mafand
+task: mafand_fr-bam_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-bbj.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f5101a752865bf641a150be2aff3b449239d3f8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-bbj.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bbj
+include: mafand
+task: mafand_fr-bbj_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-ewe.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29d4214cf433e56e8f2d479299cf413e2d211d34
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-ewe
+include: mafand
+task: mafand_fr-ewe_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-fon.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9710db5b341b91663a782945da57a3a21ae3c1ae
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-fon.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-fon
+include: mafand
+task: mafand_fr-fon_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-mos.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..682fb19c479f7f4a49a810c1f665c20439e4ee3e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-mos.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-mos
+include: mafand
+task: mafand_fr-mos_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-wol.yaml b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3740ca9b73557e472f867b7b5c33131a441e18fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/mafand_fr-wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-wol
+include: mafand
+task: mafand_fr-wol_prompt_2
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/utils.py b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df3a329824d44fa94eb830ae943fa30dd32bab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_2/english-african/utils.py
@@ -0,0 +1,121 @@
+languages = {
+    "amh": "Amharic",
+    "bam": "Bambara",
+    "bbj": "Gbomala",
+    "ewe": "Ewe",
+    "fon": "Fon",
+    "hau": "Hausa",
+    "ibo": "Igbo",
+    "kin": "Kinyarwanda",
+    "lug": "Luganda",
+    "luo": "Luo",
+    "mos": "Mossi",
+    "nya": "Chichewa",
+    "pcm": "Nigerian Pidgin",
+    "sna": "Shona",
+    "swa": "Swahili",
+    "tsn": "Setswana",
+    "twi": "Twi",
+    "wol": "Wolof",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "zul": "Zulu",
+}
+
+
+def get_target(doc):
+    target = (
+        doc["translation"]["en"]
+        if "en" in doc["translation"].keys()
+        else doc["translation"]["fr"]
+    )
+    return target
+
+
+def get_target_reverse(doc):
+    target_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    target = doc["translation"][target_key]
+    return target
+
+
+def create_text_prompt_1(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{languages[source_key]} into {source_lang}. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{languages[source_key]}: {source_sentence} \n{source_lang}: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_1(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{source_lang} into {languages[target_lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. \n{source_lang}: {source_sentence} \n{languages[target_lang]}: "
+    )
+    return prompt
+
+
+def create_text_prompt_2(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"{languages[source_key]} sentence: {source_sentence} \n{source_lang} sentence: ",
+    )
+    return prompt
+
+
+def create_reverse_prompt_2(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"{source_lang} sentence: {source_sentence} \n{languages[target_lang]} sentence: \n",
+    )
+    return prompt
+
+
+def create_text_prompt_3(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"You are a translation expert. Translate the following {languages[source_key]} sentences "
+        f"to {source_lang}. \n{languages[source_key]} sentence: {source_sentence}\n{source_lang} sentence: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_3(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"You are a translation expert. Translate the following {source_lang} sentence into {languages[target_lang]}\n"
+        f"{source_lang} sentence: {source_sentence}\n{languages[target_lang]} sentence: "
+    )
+    return prompt
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand
new file mode 100644
index 0000000000000000000000000000000000000000..eb7ad9883115b42626fd30d223fa90ff6f133384
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand
@@ -0,0 +1,28 @@
+tag:
+- mafand_tasks
+- mafand_afr-eng
+- mafand_afr-eng_prompt_3
+- afrobench_MT_tasks
+dataset_path: masakhane/mafand
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: validation
+fewshot_split: validation
+test_split: test
+doc_to_target: !function utils.get_target
+doc_to_text: !function utils.create_text_prompt_2
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_amh-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_amh-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..856318b5ad1b19fde25dd12ee3a2fc712b053b1a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_amh-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-amh
+include: mafand
+task: mafand_amh-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_bam-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_bam-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bed4252375b82e34d27c96adf368217957e33063
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_bam-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bam
+include: mafand
+task: mafand_bam-fr_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_bbj-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_bbj-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1170c266f4b14550499e7ddf930c50714560ec66
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_bbj-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bbj
+include: mafand
+task: mafand_bbj-fr_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_ewe-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_ewe-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..39a345cb6b6b86684e67978bd52c0e3071302a04
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_ewe-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-ewe
+include: mafand
+task: mafand_ewe-fr_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_fon-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_fon-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b464bb913f4888d825a2ab2aa2d566ef5d422d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_fon-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-fon
+include: mafand
+task: mafand_fon-fr_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_hau-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_hau-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c0b0f15fe011d52ff4bbd167e23453a621e2928
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_hau-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-hau
+include: mafand
+task: mafand_hau-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_ibo-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_ibo-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f78f55a3b27a4461efd85e741503ead06e4bcce
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_ibo-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-ibo
+include: mafand
+task: mafand_ibo-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_kin-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_kin-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..254b22be3883110224aee429313cf2636993e675
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_kin-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-kin
+include: mafand
+task: mafand_kin-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_lug-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_lug-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad19b3c85e6aae28a01c7e3476f8492e804c6d83
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_lug-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-lug
+include: mafand
+task: mafand_lug-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_luo-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_luo-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3a367493d5da3804d2eda16eba676886765939e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_luo-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-luo
+include: mafand
+task: mafand_luo-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_mos-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_mos-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ea419312925c06760ddc6f6efbf343594f2b932
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_mos-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-mos
+include: mafand
+task: mafand_mos-fr_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_nya-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_nya-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de9ec930a1b738673b91ed89916c17109b159e66
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_nya-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-nya
+include: mafand
+task: mafand_nya-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_pcm-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_pcm-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95ad3380334ced0554dc21ab4e6a07f23352f51d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_pcm-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-pcm
+include: mafand
+task: mafand_pcm-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_sna-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_sna-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d86ccc3ad64220d354d4e9e230ee585e556b32c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_sna-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-sna
+include: mafand
+task: mafand_sna-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_swa-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_swa-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3c70f2e3e77f4ee94edd757311e8b37816911104
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_swa-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-swa
+include: mafand
+task: mafand_swa-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_tsn-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_tsn-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ee8f4152a80d5af1bfd4b792167021d81e37284
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_tsn-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-tsn
+include: mafand
+task: mafand_tsn-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_twi-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_twi-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a37d2395a4721c6553a878f6693221cbac6a22a6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_twi-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-twi
+include: mafand
+task: mafand_twi-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_wol-fr.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_wol-fr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed778cbe6690aafa7abd33fad50ef41fc25dbaea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_wol-fr.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-wol
+include: mafand
+task: mafand_wol-fr_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_xho-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_xho-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93e9e2fee5b39b755b9e060697e509a5f63d58e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_xho-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-xho
+include: mafand
+task: mafand_xho-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_yor-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_yor-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78301f7e658cd489eb6433f3ac5a10a0f0cde49b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_yor-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-yor
+include: mafand
+task: mafand_yor-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_zul-en.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_zul-en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06177d14ec829fe74a12030d88e06a5fee7bc9a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/mafand_zul-en.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-zul
+include: mafand
+task: mafand_zul-en_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/utils.py b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df3a329824d44fa94eb830ae943fa30dd32bab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/african-english/utils.py
@@ -0,0 +1,121 @@
+languages = {
+    "amh": "Amharic",
+    "bam": "Bambara",
+    "bbj": "Gbomala",
+    "ewe": "Ewe",
+    "fon": "Fon",
+    "hau": "Hausa",
+    "ibo": "Igbo",
+    "kin": "Kinyarwanda",
+    "lug": "Luganda",
+    "luo": "Luo",
+    "mos": "Mossi",
+    "nya": "Chichewa",
+    "pcm": "Nigerian Pidgin",
+    "sna": "Shona",
+    "swa": "Swahili",
+    "tsn": "Setswana",
+    "twi": "Twi",
+    "wol": "Wolof",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "zul": "Zulu",
+}
+
+
+def get_target(doc):
+    target = (
+        doc["translation"]["en"]
+        if "en" in doc["translation"].keys()
+        else doc["translation"]["fr"]
+    )
+    return target
+
+
+def get_target_reverse(doc):
+    target_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    target = doc["translation"][target_key]
+    return target
+
+
+def create_text_prompt_1(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{languages[source_key]} into {source_lang}. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{languages[source_key]}: {source_sentence} \n{source_lang}: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_1(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{source_lang} into {languages[target_lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. \n{source_lang}: {source_sentence} \n{languages[target_lang]}: "
+    )
+    return prompt
+
+
+def create_text_prompt_2(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"{languages[source_key]} sentence: {source_sentence} \n{source_lang} sentence: ",
+    )
+    return prompt
+
+
+def create_reverse_prompt_2(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"{source_lang} sentence: {source_sentence} \n{languages[target_lang]} sentence: \n",
+    )
+    return prompt
+
+
+def create_text_prompt_3(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"You are a translation expert. Translate the following {languages[source_key]} sentences "
+        f"to {source_lang}. \n{languages[source_key]} sentence: {source_sentence}\n{source_lang} sentence: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_3(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"You are a translation expert. Translate the following {source_lang} sentence into {languages[target_lang]}\n"
+        f"{source_lang} sentence: {source_sentence}\n{languages[target_lang]} sentence: "
+    )
+    return prompt
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand
new file mode 100644
index 0000000000000000000000000000000000000000..9a59654e4feba09f57277b539633c2b0efda291e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand
@@ -0,0 +1,28 @@
+tag:
+- mafand_tasks
+- mafand_eng-afr
+- mafand_eng-afr_prompt_3
+- afrobench_MT_tasks
+dataset_path: masakhane/mafand
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: validation
+fewshot_split: validation
+test_split: test
+doc_to_target: !function utils.get_target_reverse
+doc_to_text: !function utils.create_reverse_prompt_3
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-amh.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10872430688882c53e5f2dba5aea70d1194c6020
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-amh.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-amh
+include: mafand
+task: mafand_en-amh_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-hau.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f64e68162a76b1a0cc33f44d62a0f66b5dc099e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-hau.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-hau
+include: mafand
+task: mafand_en-hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-ibo.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72df05e551444a9e82de940171c646797f1c18d6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-ibo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-ibo
+include: mafand
+task: mafand_en-ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-kin.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44c48678e89c3dd7e72d310fd050b5a1d3bc6092
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-kin.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-kin
+include: mafand
+task: mafand_en-kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-lug.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2beae91b569db6fc361da97e0879e854af005e4d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-lug.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-lug
+include: mafand
+task: mafand_en-lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-luo.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b4c1aa8becf693cb16d4d1be2820707e521a1052
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-luo.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-luo
+include: mafand
+task: mafand_en-luo_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-nya.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eee7af0ce8ed105fa08769793ad816c2f4d17318
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-nya.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-nya
+include: mafand
+task: mafand_en-nya_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-pcm.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6e60642562403f3b86d2e13fb3ea48368dc84883
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-pcm.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-pcm
+include: mafand
+task: mafand_en-pcm_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-sna.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82abd862535426ee078cd45f057c792643062981
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-sna.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-sna
+include: mafand
+task: mafand_en-sna_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-swa.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a7135ff6921556928338e52e3dbcc8afa731025
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-swa.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-swa
+include: mafand
+task: mafand_en-swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-tsn.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b976b5fd24007928854afe3b533ffd8b66ea0851
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-tsn.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-tsn
+include: mafand
+task: mafand_en-tsn_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-twi.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..53345a2668eccb93b11568c22f6d218598f20ba2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-twi.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-twi
+include: mafand
+task: mafand_en-twi_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-xho.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4eba7f6994b577b4769b6b74a2848ecc0b3b0fb0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-xho.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-xho
+include: mafand
+task: mafand_en-xho_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-yor.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b20e9f920deb657630ef0aa5aa6a443934f0519
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-yor.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-yor
+include: mafand
+task: mafand_en-yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-zul.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cb5280b995b0754ae6a4f6cd33bfc82350d0e8cb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_en-zul.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: en-zul
+include: mafand
+task: mafand_en-zul_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-bam.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e94be00dea4c013238a543cc3ceeb2982b92ce4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-bam.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bam
+include: mafand
+task: mafand_fr-bam_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-bbj.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9170a6b500239357dacf736b06044dd17aff5b25
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-bbj.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-bbj
+include: mafand
+task: mafand_fr-bbj_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-ewe.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7139c81fd0c991bf9ee6a24013a34a8b0b700efc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-ewe.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-ewe
+include: mafand
+task: mafand_fr-ewe_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-fon.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b42292ce56e6abc7127e9988e5619e0eee3d56ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-fon.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-fon
+include: mafand
+task: mafand_fr-fon_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-mos.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..044047c346abcb945443b2b500eef7bd32f2caad
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-mos.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-mos
+include: mafand
+task: mafand_fr-mos_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-wol.yaml b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9fc1bca3b94f64489d26632c70c022558e7793b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/mafand_fr-wol.yaml
@@ -0,0 +1,4 @@
+# Generated by utils.py
+dataset_name: fr-wol
+include: mafand
+task: mafand_fr-wol_prompt_3
diff --git a/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/utils.py b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0df3a329824d44fa94eb830ae943fa30dd32bab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/mafand/prompt_3/english-african/utils.py
@@ -0,0 +1,121 @@
+languages = {
+    "amh": "Amharic",
+    "bam": "Bambara",
+    "bbj": "Gbomala",
+    "ewe": "Ewe",
+    "fon": "Fon",
+    "hau": "Hausa",
+    "ibo": "Igbo",
+    "kin": "Kinyarwanda",
+    "lug": "Luganda",
+    "luo": "Luo",
+    "mos": "Mossi",
+    "nya": "Chichewa",
+    "pcm": "Nigerian Pidgin",
+    "sna": "Shona",
+    "swa": "Swahili",
+    "tsn": "Setswana",
+    "twi": "Twi",
+    "wol": "Wolof",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "zul": "Zulu",
+}
+
+
+def get_target(doc):
+    target = (
+        doc["translation"]["en"]
+        if "en" in doc["translation"].keys()
+        else doc["translation"]["fr"]
+    )
+    return target
+
+
+def get_target_reverse(doc):
+    target_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    target = doc["translation"][target_key]
+    return target
+
+
+def create_text_prompt_1(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{languages[source_key]} into {source_lang}. \nYour main goal is to ensure translations are grammatically "
+        f"correct and human-oriented. \n{languages[source_key]}: {source_sentence} \n{source_lang}: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_1(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        "You are an advanced Translator, a specialized assistant designed to translate documents from "
+        f"{source_lang} into {languages[target_lang]}. \nYour main goal is to ensure translations are "
+        f"grammatically correct and human-oriented. \n{source_lang}: {source_sentence} \n{languages[target_lang]}: "
+    )
+    return prompt
+
+
+def create_text_prompt_2(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"{languages[source_key]} sentence: {source_sentence} \n{source_lang} sentence: ",
+    )
+    return prompt
+
+
+def create_reverse_prompt_2(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"{source_lang} sentence: {source_sentence} \n{languages[target_lang]} sentence: \n",
+    )
+    return prompt
+
+
+def create_text_prompt_3(doc):
+    source_key = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_sentence = doc["translation"][source_key]
+    source_lang = "English" if "en" in doc["translation"].keys() else "French"
+    prompt = (
+        f"You are a translation expert. Translate the following {languages[source_key]} sentences "
+        f"to {source_lang}. \n{languages[source_key]} sentence: {source_sentence}\n{source_lang} sentence: "
+    )
+    return prompt
+
+
+def create_reverse_prompt_3(doc):
+    target_lang = [key for key in doc["translation"].keys() if key not in ["en", "fr"]][
+        0
+    ]
+    source_key = "en" if "en" in doc["translation"].keys() else "fr"
+    source_lang = "English" if source_key == "en" else "French"
+    source_sentence = doc["translation"][source_key]
+    prompt = (
+        f"You are a translation expert. Translate the following {source_lang} sentence into {languages[target_lang]}\n"
+        f"{source_lang} sentence: {source_sentence}\n{languages[target_lang]} sentence: "
+    )
+    return prompt
diff --git a/lm_eval/tasks/afrobench/masakhaner/README.md b/lm_eval/tasks/afrobench/masakhaner/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ca96648e07bb2c54fbf0c79d968b2ef4cb6aba75
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/README.md
@@ -0,0 +1,76 @@
+#
+
+## Paper
+Title: `MasakhaNER 2.0: Africa-centric Transfer Learning for Named Entity Recognition`
+
+Paper Link: https://aclanthology.org/2022.emnlp-main.298/
+
+## Abstract
+>African languages are spoken by over a billion people, but they are under-represented in NLP research and development. Multiple challenges exist, including the limited availability of annotated training and evaluation datasets as well as the lack of understanding of which settings, languages, and recently proposed methods like cross-lingual transfer will be effective. In this paper, we aim to move towards solutions for these challenges, focusing on the task of named entity recognition (NER). We present the creation of the largest to-date human-annotated NER dataset for 20 African languages. We study the behaviour of state-of-the-art cross-lingual transfer methods in an Africa-centric setting, empirically demonstrating that the choice of source transfer language significantly affects performance. While much previous work defaults to using English as the source language, our results show that choosing the best transfer language improves zero-shot F1 scores by an average of 14% over 20 languages as compared to using English.
+
+HomePage: https://github.com/masakhane-io/masakhane-ner
+
+### Citation
+
+```
+@inproceedings{adelani-etal-2022-masakhaner,
+    title = "{M}asakha{NER} 2.0: {A}frica-centric Transfer Learning for Named Entity Recognition",
+    author = "Adelani, David Ifeoluwa  and
+      Neubig, Graham  and
+      Ruder, Sebastian  and
+      Rijhwani, Shruti  and
+      Beukman, Michael  and
+      Palen-Michel, Chester  and
+      Lignos, Constantine  and
+      Alabi, Jesujoba O.  and
+      Muhammad, Shamsuddeen H.  and
+      Nabende, Peter  and
+      Dione, Cheikh M. Bamba  and
+      Bukula, Andiswa  and
+      Mabuya, Rooweither  and
+      Dossou, Bonaventure F. P.  and
+      Sibanda, Blessing  and
+      Buzaaba, Happy  and
+      Mukiibi, Jonathan  and
+      Kalipe, Godson  and
+      Mbaye, Derguene  and
+      Taylor, Amelia  and
+      Kabore, Fatoumata  and
+      Emezue, Chris Chinenye  and
+      Aremu, Anuoluwapo  and
+      Ogayo, Perez  and
+      Gitau, Catherine  and
+      Munkoh-Buabeng, Edwin  and
+      Memdjokam Koagne, Victoire  and
+      Tapo, Allahsera Auguste  and
+      Macucwa, Tebogo  and
+      Marivate, Vukosi  and
+      Mboning, Elvis  and
+      Gwadabe, Tajuddeen  and
+      Adewumi, Tosin  and
+      Ahia, Orevaoghene  and
+      Nakatumba-Nabende, Joyce  and
+      Mokono, Neo L.  and
+      Ezeani, Ignatius  and
+      Chukwuneke, Chiamaka  and
+      Adeyemi, Mofetoluwa  and
+      Hacheme, Gilles Q.  and
+      Abdulmumim, Idris  and
+      Ogundepo, Odunayo  and
+      Yousuf, Oreen  and
+      Moteu Ngoli, Tatiana  and
+      Klakow, Dietrich",
+    editor = "Goldberg, Yoav  and
+      Kozareva, Zornitsa  and
+      Zhang, Yue",
+    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
+    month = dec,
+    year = "2022",
+    address = "Abu Dhabi, United Arab Emirates",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2022.emnlp-main.298/",
+    doi = "10.18653/v1/2022.emnlp-main.298",
+    pages = "4488--4508",
+    abstract = "African languages are spoken by over a billion people, but they are under-represented in NLP research and development. Multiple challenges exist, including the limited availability of annotated training and evaluation datasets as well as the lack of understanding of which settings, languages, and recently proposed methods like cross-lingual transfer will be effective. In this paper, we aim to move towards solutions for these challenges, focusing on the task of named entity recognition (NER). We present the creation of the largest to-date human-annotated NER dataset for 20 African languages. We study the behaviour of state-of-the-art cross-lingual transfer methods in an Africa-centric setting, empirically demonstrating that the choice of source transfer language significantly affects performance. While much previous work defaults to using English as the source language, our results show that choosing the best transfer language improves zero-shot F1 scores by an average of 14{\%} over 20 languages as compared to using English."
+}
+```
diff --git a/lm_eval/tasks/afrobench/masakhaner/gen_utils.py b/lm_eval/tasks/afrobench/masakhaner/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d1012021f567ab02ccdd6259788e00ea1f759e9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/gen_utils.py
@@ -0,0 +1,138 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Named entities refers to names of location, organisation and personal name. \n For example, "
+        "'David is an employee of Amazon and he is visiting New York next week to see Esther' will be \n"
+        "PERSON: David $ ORGANIZATION: Amazon $ LOCATION: New York $ PERSON: Esther \n\n"
+        "Ensure the output strictly follows the format: label: entity $ label: entity, with each unique "
+        "entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity) or "
+        "irrelevant entries like none. \n\nText: {{text}} \n"
+        "Return only the output",
+        "prompt_2": "You are working as a named entity recognition expert and your task is to label a given text "
+        "with named entity labels. Your task is to identify and label any named entities present in the "
+        "text. The named entity labels that you will be using are PER (person), LOC (location), "
+        "ORG (organization) and DATE (date). Label multi-word entities as a single named entity. "
+        "For words which are not part of any named entity, do not return any value for it. \n"
+        "Ensure the output strictly follows the format: label: entity $$ label: entity, with each unique "
+        "entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity) or "
+        "irrelevant entries like none. Return only the output \n\nText: {{text}}",
+        "prompt_3": f"You are a Named Entity Recognition expert in {lang} language. \nExtract all named entities from "
+        f"the following {lang} text and categorize them into PERSON, LOCATION, ORGANIZATION, or DATE. "
+        f"Ensure the output strictly follows the format: label: entity $$ label: entity, with each unique "
+        "entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity) or "
+        "irrelevant entries like none. Return only the output \n\nText: {{text}}",
+        "prompt_4": f"As a {lang} linguist, label all named entities in the {lang} text below with the categories: "
+        "PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output strictly follows the format: label: "
+        "entity $$ label: entity, with each unique entity on a separate label line, avoiding grouped "
+        "entities (e.g., avoid LOC: entity, entity) or irrelevant entries like none. Return only the "
+        "output. \n\nText: {{text}}",
+        "prompt_5": "Provide a concise list of named entities in the text below. Use the following labels: "
+        "PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output strictly follows the format: label: "
+        "entity $$ label: entity, with each unique entity on a separate label line, avoiding grouped "
+        "entities (e.g., avoid LOC: entity, entity) or irrelevant entries like none. Return only the "
+        "output.  \n\nText: {{text}}",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "am": "Amharic",
+        "bm": "Bambara",
+        "bbj": "Ghomala",
+        "ee": "Ewe",
+        "ha": "Hausa",
+        "ig": "Igbo",
+        "rw": "Kinyarwanda",
+        "lg": "Luganda",
+        "luo": "Luo",
+        "mos": "Mossi",
+        "ny": "Chichewa",
+        "pcm": "Nigerian Pidgin",
+        "sn": "chiShona",
+        "sw": "Kiswahili",
+        "tn": "Setswana",
+        "tw": "Twi",
+        "wo": "Wolof",
+        "xh": "isiXhosa",
+        "yo": "Yoruba",
+        "zu": "isiZulu",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"masakhaner_{lang}.yaml"
+            task_name = f"masakhaner_{lang}_{mode}"
+            yaml_template = "masakhaner"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/masakhaner/masakhaner.yaml b/lm_eval/tasks/afrobench/masakhaner/masakhaner.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0d374e80c43cc8831d167887e386f3500773b48
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/masakhaner.yaml
@@ -0,0 +1,13 @@
+group: masakhaner
+task:
+  - masakhaner_prompt_1
+  - masakhaner_prompt_2
+  - masakhaner_prompt_3
+  - masakhaner_prompt_4
+  - masakhaner_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner
new file mode 100644
index 0000000000000000000000000000000000000000..706eb36644524b2aaa10b686ce120048e6322390
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner
@@ -0,0 +1,26 @@
+tag:
+- masakhaner_tasks
+- masakhaner_prompt_1
+dataset_path: masakhane/masakhaner-x
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: target
+filter_list:
+ - name: flexible-extract
+   filter:
+     - function: format_span
+metric_list:
+  - metric: f1
+    aggregation: !function utils.span_f1_agg
+    higher_is_better: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_am.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2128752f754eb8c46f7608c89dbefd7a3800480
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_am.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: am
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_am_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_bbj.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f3a72bdc0257aedee8787af75a70c14560bfc53
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_bbj.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_bbj_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_bm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_bm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c38bdee947c2d34a4a7797eae1251fc476be0f53
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_bm.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: bm
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_bm_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ee.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ee.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97903908e30cfe376626f7c668d5d9862593d73e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ee.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: ee
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_ee_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ha.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad11710407bda79b9561cd05725c21eb945ca292
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ha.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_ha_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ig.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ig.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f06c0655595ae81b985266e4e629b080d49130c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ig.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_ig_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_lg.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_lg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1823b20f63e1e4d2bcf5edc1427be758c3d16a62
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_lg.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: lg
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_lg_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_luo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55b6d82968ed9a09ef43a12bd5cba91ea4ab5c87
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_luo.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_luo_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_mos.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac5ddf43cd0eefd5fbe85785c7b4687135924938
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_mos.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_mos_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ny.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ny.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36d12ad2c00c0101aa2405af5824b6cbf310d132
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_ny.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: ny
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_ny_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_pcm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c09bf44c682758e53deb9262e4aef393d8bbc8e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_pcm.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_pcm_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_rw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_rw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7398e5fbe7b55f837b900158b7fdc4a3b7e2ac92
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_rw.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: rw
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_rw_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_sn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_sn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ecdd3260fcb8b0fd147dfba666aa0a42e4687323
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_sn.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: sn
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_sn_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_sw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2bd3379c3b3508067ae500c5cad947ba7006b74
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_sw.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: sw
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_sw_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_tn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_tn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50d80dcb79ab8f2bb0f423d42159dd1d56fd262f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_tn.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: tn
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_tn_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_tw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_tw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c8a8d40575c4867cc0ec33a96343f1eb9c29f7b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_tw.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: tw
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_tw_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_wo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_wo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e5f6eeaecb9ad56ee2cf035cf1390f489d8ba98
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_wo.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: wo
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_wo_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_xh.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_xh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b27051f5df77d39561c0fcee81033a357a9220d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_xh.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: xh
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_xh_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_yo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2fdb71aa53d4eacdb5cfcdada220423909f9515d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_yo.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_yo_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_zu.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83b9d4b0fd5ef3ab2d9f62639d49a9cea1e3a1a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/masakhaner_zu.yaml
@@ -0,0 +1,11 @@
+# Generated by utils.py
+dataset_name: zu
+doc_to_text: "Named entities refers to names of location, organisation and personal\
+  \ name. \n For example, 'David is an employee of Amazon and he is visiting New York\
+  \ next week to see Esther' will be \nPERSON: David $ ORGANIZATION: Amazon $ LOCATION:\
+  \ New York $ PERSON: Esther \n\nEnsure the output strictly follows the format: label:\
+  \ entity $ label: entity, with each unique entity on a separate label line, avoiding\
+  \ grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries like\
+  \ none. \n\nText: {{text}} \nReturn only the output"
+include: masakhaner
+task: masakhaner_zu_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_1/utils.py b/lm_eval/tasks/afrobench/masakhaner/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..76909044e7f35948156f8bb506ce2fce563ec689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_1/utils.py
@@ -0,0 +1,146 @@
+import collections
+import re
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    return transform_text(doc["ner_tags"])
+
+
+def transform_text(text):
+    entities = []
+    current_entity = ""
+    current_tag = ""
+
+    for pair in text.split("\n"):
+        if pair:  # Check if the line is not empty
+            word, tag = pair.strip().split(": ")
+            tag = tag.upper()
+            word = word.lower()
+            word = word.strip(",.").strip()
+
+            if tag.startswith("B-"):
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                current_tag = tag.split("-")[1]
+                current_entity = word
+            elif tag.startswith("I-") and tag.split("-")[1] == current_tag:
+                current_entity += word
+            else:
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                    current_entity = ""
+                    current_tag = ""
+    if current_entity:
+        entities.append(f"{current_tag}: {current_entity}")
+
+        # Join all the transformed output lines with $$ as separator
+    return " $$ ".join(entities)
+
+
+def span_f1_agg(items):
+    """Computes Span based F1 score.
+
+    This function is copied from
+    https://github.com/google-research/multilingual-t5/blob/master/multilingual_t5/evaluation/metrics.py
+
+    Args:
+    targets: list of strings or list of list of strings if multiple references
+      are present.
+    predictions: list of strings
+
+    Returns:
+    span f1 across all targets and predictions (Based on CoNLL script)
+    """
+    unzipped_list = list(zip(*items))
+    targets = unzipped_list[0]
+    predictions = unzipped_list[1]
+
+    true_positives = collections.defaultdict(int)
+    false_positives = collections.defaultdict(int)
+    false_negatives = collections.defaultdict(int)
+
+    def normalize_text(strings):
+        def get_blank_spaces_pattern():
+            return re.compile(r"\s{3,}|\t")
+
+        def remove_blank_spaces(text):
+            text = re.sub(pattern=get_blank_spaces_pattern(), repl="", string=text)
+            text = re.sub("\s+", " ", text)
+            return text
+
+        def remove_punctuation(text):
+            my_punctuation = '!"$%&\'()*+,-./:;<=>?[\\]^_`{|}~•@.""-,`'
+            text = re.sub(
+                "[" + my_punctuation + "]+", " ", str(text)
+            )  # strip punctuation
+            return text
+
+        def remove_articles(text):
+            regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+            return re.sub(regex, " ", text)
+
+        def lowercase(text):
+            text = text.lower()
+            return text
+
+        strings = remove_punctuation(strings)
+        strings = remove_articles(strings)
+        strings = remove_blank_spaces(strings)
+        strings = lowercase(strings)
+
+        return strings
+
+    def tags_to_spans(tag_sequence, delimiter="$$"):
+        """Extract spans from IOB1 or BIO tags."""
+        if isinstance(tag_sequence, list):
+            tag_sequence = " ".join(i.strip() for i in tag_sequence)
+        tag_sequence_split = [
+            item.strip()
+            for sub in tag_sequence.strip().split(delimiter)
+            for item in sub.split("$")
+            if item
+        ]
+        tag_sequence_split = [
+            item.strip()
+            for value in tag_sequence_split
+            for sub in value.split(". ")
+            for item in sub.split(", ")
+        ]
+        tags_entities = []
+        for tag_entity in tag_sequence_split:
+            tag_entity_split = tag_entity.split(": ")
+            if len(tag_entity_split) != 2:
+                continue
+            tag = normalize_text(tag_entity_split[0].strip())
+            entity = normalize_text(tag_entity_split[1].rstrip().lstrip())
+            tags_entities.append((tag, entity))
+        return tags_entities
+
+    def compute_f1_metrics(true_positive, false_positive, false_negative):
+        precision = float(true_positive) / float(true_positive + false_positive + 1e-13)
+        recall = float(true_positive) / float(true_positive + false_negative + 1e-13)
+        f1_measures = 2.0 * ((precision * recall) / (precision + recall + 1e-13))
+        return precision, recall, f1_measures
+
+    for target, pred in zip(targets, predictions):
+        gold_spans = tags_to_spans(target)
+        predicted_spans = tags_to_spans(pred)
+
+        for span in predicted_spans:
+            if span in gold_spans:
+                true_positives[span[0]] += 1
+                gold_spans.remove(span)
+            else:
+                false_positives[span[0]] += 1
+        # These spans weren't predicted.
+        for span in gold_spans:
+            false_negatives[span[0]] += 1
+
+    _, _, f1_measure = compute_f1_metrics(
+        sum(true_positives.values()),
+        sum(false_positives.values()),
+        sum(false_negatives.values()),
+    )
+    return f1_measure
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner
new file mode 100644
index 0000000000000000000000000000000000000000..2fd5eb829ce60a5a16d970dbf6b0078e42135cf4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner
@@ -0,0 +1,26 @@
+tag:
+- masakhaner_tasks
+- masakhaner_prompt_2
+dataset_path: masakhane/masakhaner-x
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: target
+filter_list:
+ - name: flexible-extract
+   filter:
+     - function: format_span
+metric_list:
+  - metric: f1
+    aggregation: !function utils.span_f1_agg
+    higher_is_better: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_am.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd1bd33551e152f6f29ff9d248d76ec236be75d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_am.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: am
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_am_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_bbj.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d817ecbe3596b40fad80fe4b95af7ace7bd9e35
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_bbj.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bbj_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_bm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_bm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f99a03c7486f8f7170d19b1935e223210915c137
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_bm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bm
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bm_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ee.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ee.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da31685e7d9fb0fa462403e0347347600c705d41
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ee.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ee
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ee_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ha.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8075046a92f4c88ce8a75db0416732b4f0e96f45
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ha.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ha_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ig.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ig.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8771f510a5de2a7ff56acd593b4de3b2309dd07
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ig.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ig_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_lg.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_lg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5c6729e368b3cf53900035c5730d9b729a9e2baa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_lg.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lg
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_lg_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_luo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a458235f101ae430c77f9da1124a0b8d9fdcda38
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_luo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_luo_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_mos.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..816b9bdedc4578c6d2ade8cb05258a4ebc7280de
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_mos.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_mos_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ny.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ny.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f8c4c13c89495b4da3b07ad432b3969310037f8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_ny.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ny
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ny_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_pcm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75dc6ec048cab2dc550d8174dbcc44d966a6ff8e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_pcm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_pcm_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_rw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_rw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb93e2d4b24c68bc46b0f141d4c43a51b39ea41e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_rw.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: rw
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_rw_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_sn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_sn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60380a512424253dc84f826922fc1fd2f9e75d72
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_sn.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sn
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sn_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_sw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82cf74ae26a0cec7d9d659713c7d0203ab829a1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_sw.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sw
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sw_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_tn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_tn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1852ebe9ae79b51e586fed461b88e7b03fc8557c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_tn.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: tn
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tn_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_tw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_tw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea354958bcf1590a41fc4e48426d403cae4f9454
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_tw.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: tw
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tw_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_wo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_wo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7cd0d754be2d6fc09054f50d18d29fa07a8551a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_wo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wo
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_wo_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_xh.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_xh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9451f0edd121337f8d4dd316284c05a8ea73ff6b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_xh.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xh
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_xh_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_yo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc0d92c50ed1cf7182b55ed192b2b440d58317e9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_yo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_yo_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_zu.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e06bf3cef883c5ae3f9bf12d7abf5c27618d37e8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/masakhaner_zu.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zu
+doc_to_text: "You are working as a named entity recognition expert and your task is\
+  \ to label a given text with named entity labels. Your task is to identify and label\
+  \ any named entities present in the text. The named entity labels that you will\
+  \ be using are PER (person), LOC (location), ORG (organization) and DATE (date).\
+  \ Label multi-word entities as a single named entity. For words which are not part\
+  \ of any named entity, do not return any value for it. \nEnsure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_zu_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_2/utils.py b/lm_eval/tasks/afrobench/masakhaner/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..76909044e7f35948156f8bb506ce2fce563ec689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_2/utils.py
@@ -0,0 +1,146 @@
+import collections
+import re
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    return transform_text(doc["ner_tags"])
+
+
+def transform_text(text):
+    entities = []
+    current_entity = ""
+    current_tag = ""
+
+    for pair in text.split("\n"):
+        if pair:  # Check if the line is not empty
+            word, tag = pair.strip().split(": ")
+            tag = tag.upper()
+            word = word.lower()
+            word = word.strip(",.").strip()
+
+            if tag.startswith("B-"):
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                current_tag = tag.split("-")[1]
+                current_entity = word
+            elif tag.startswith("I-") and tag.split("-")[1] == current_tag:
+                current_entity += word
+            else:
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                    current_entity = ""
+                    current_tag = ""
+    if current_entity:
+        entities.append(f"{current_tag}: {current_entity}")
+
+        # Join all the transformed output lines with $$ as separator
+    return " $$ ".join(entities)
+
+
+def span_f1_agg(items):
+    """Computes Span based F1 score.
+
+    This function is copied from
+    https://github.com/google-research/multilingual-t5/blob/master/multilingual_t5/evaluation/metrics.py
+
+    Args:
+    targets: list of strings or list of list of strings if multiple references
+      are present.
+    predictions: list of strings
+
+    Returns:
+    span f1 across all targets and predictions (Based on CoNLL script)
+    """
+    unzipped_list = list(zip(*items))
+    targets = unzipped_list[0]
+    predictions = unzipped_list[1]
+
+    true_positives = collections.defaultdict(int)
+    false_positives = collections.defaultdict(int)
+    false_negatives = collections.defaultdict(int)
+
+    def normalize_text(strings):
+        def get_blank_spaces_pattern():
+            return re.compile(r"\s{3,}|\t")
+
+        def remove_blank_spaces(text):
+            text = re.sub(pattern=get_blank_spaces_pattern(), repl="", string=text)
+            text = re.sub("\s+", " ", text)
+            return text
+
+        def remove_punctuation(text):
+            my_punctuation = '!"$%&\'()*+,-./:;<=>?[\\]^_`{|}~•@.""-,`'
+            text = re.sub(
+                "[" + my_punctuation + "]+", " ", str(text)
+            )  # strip punctuation
+            return text
+
+        def remove_articles(text):
+            regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+            return re.sub(regex, " ", text)
+
+        def lowercase(text):
+            text = text.lower()
+            return text
+
+        strings = remove_punctuation(strings)
+        strings = remove_articles(strings)
+        strings = remove_blank_spaces(strings)
+        strings = lowercase(strings)
+
+        return strings
+
+    def tags_to_spans(tag_sequence, delimiter="$$"):
+        """Extract spans from IOB1 or BIO tags."""
+        if isinstance(tag_sequence, list):
+            tag_sequence = " ".join(i.strip() for i in tag_sequence)
+        tag_sequence_split = [
+            item.strip()
+            for sub in tag_sequence.strip().split(delimiter)
+            for item in sub.split("$")
+            if item
+        ]
+        tag_sequence_split = [
+            item.strip()
+            for value in tag_sequence_split
+            for sub in value.split(". ")
+            for item in sub.split(", ")
+        ]
+        tags_entities = []
+        for tag_entity in tag_sequence_split:
+            tag_entity_split = tag_entity.split(": ")
+            if len(tag_entity_split) != 2:
+                continue
+            tag = normalize_text(tag_entity_split[0].strip())
+            entity = normalize_text(tag_entity_split[1].rstrip().lstrip())
+            tags_entities.append((tag, entity))
+        return tags_entities
+
+    def compute_f1_metrics(true_positive, false_positive, false_negative):
+        precision = float(true_positive) / float(true_positive + false_positive + 1e-13)
+        recall = float(true_positive) / float(true_positive + false_negative + 1e-13)
+        f1_measures = 2.0 * ((precision * recall) / (precision + recall + 1e-13))
+        return precision, recall, f1_measures
+
+    for target, pred in zip(targets, predictions):
+        gold_spans = tags_to_spans(target)
+        predicted_spans = tags_to_spans(pred)
+
+        for span in predicted_spans:
+            if span in gold_spans:
+                true_positives[span[0]] += 1
+                gold_spans.remove(span)
+            else:
+                false_positives[span[0]] += 1
+        # These spans weren't predicted.
+        for span in gold_spans:
+            false_negatives[span[0]] += 1
+
+    _, _, f1_measure = compute_f1_metrics(
+        sum(true_positives.values()),
+        sum(false_positives.values()),
+        sum(false_negatives.values()),
+    )
+    return f1_measure
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner
new file mode 100644
index 0000000000000000000000000000000000000000..7f32f86b1e194826a7ffe7d4edb0935eac80c491
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner
@@ -0,0 +1,26 @@
+tag:
+- masakhaner_tasks
+- masakhaner_prompt_3
+dataset_path: masakhane/masakhaner-x
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: target
+filter_list:
+ - name: flexible-extract
+   filter:
+     - function: format_span
+metric_list:
+  - metric: f1
+    aggregation: !function utils.span_f1_agg
+    higher_is_better: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_am.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54ad8b54111743ecf392d944c6201bfc56e5362c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_am.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: am
+doc_to_text: "You are a Named Entity Recognition expert in Amharic language. \nExtract\
+  \ all named entities from the following Amharic text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_am_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_bbj.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23e724f424a862308d1947b176cc24b5eb040d47
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_bbj.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "You are a Named Entity Recognition expert in Ghomala language. \nExtract\
+  \ all named entities from the following Ghomala text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bbj_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_bm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_bm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62b5b80e7c26335f304f7ed9673dd9f1c94ef970
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_bm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: bm
+doc_to_text: "You are a Named Entity Recognition expert in Bambara language. \nExtract\
+  \ all named entities from the following Bambara text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bm_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ee.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ee.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2cdadd27559ab964cacfd396be6fb29a3ae392e8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ee.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ee
+doc_to_text: "You are a Named Entity Recognition expert in Ewe language. \nExtract\
+  \ all named entities from the following Ewe text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ee_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ha.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d19d26f67447f8916f74b5e26d1bccc0e65bc57
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ha.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "You are a Named Entity Recognition expert in Hausa language. \nExtract\
+  \ all named entities from the following Hausa text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ha_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ig.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ig.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..edf6119689b51760de8a77b70c7dd4461f41b99d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ig.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "You are a Named Entity Recognition expert in Igbo language. \nExtract\
+  \ all named entities from the following Igbo text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ig_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_lg.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_lg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9318a78207c0f3f9e6d2de954e6707100143258e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_lg.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lg
+doc_to_text: "You are a Named Entity Recognition expert in Luganda language. \nExtract\
+  \ all named entities from the following Luganda text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_lg_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_luo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..61254fc358a2bd8f2ed2a5dd6aca3d09f88ae232
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_luo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "You are a Named Entity Recognition expert in Luo language. \nExtract\
+  \ all named entities from the following Luo text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_luo_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_mos.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84ff6b24aaffaf14564de139bde92b1c850bcddd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_mos.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "You are a Named Entity Recognition expert in Mossi language. \nExtract\
+  \ all named entities from the following Mossi text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_mos_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ny.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ny.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd592c5b93e39a488d5c5d909137e1caada54840
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_ny.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ny
+doc_to_text: "You are a Named Entity Recognition expert in Chichewa language. \nExtract\
+  \ all named entities from the following Chichewa text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ny_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_pcm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b448b244b8d6580c0ebc53817060de633dd39efb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_pcm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are a Named Entity Recognition expert in Nigerian Pidgin language.\
+  \ \nExtract all named entities from the following Nigerian Pidgin text and categorize\
+  \ them into PERSON, LOCATION, ORGANIZATION, or DATE. Ensure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_pcm_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_rw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_rw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5356ce8b011c53b77ab18302512e30b52c727e37
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_rw.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: rw
+doc_to_text: "You are a Named Entity Recognition expert in Kinyarwanda language. \n\
+  Extract all named entities from the following Kinyarwanda text and categorize them\
+  \ into PERSON, LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows\
+  \ the format: label: entity $$ label: entity, with each unique entity on a separate\
+  \ label line, avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant\
+  \ entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_rw_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_sn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_sn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab356ae80061b8c49348513b8f6fa05b2dea9473
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_sn.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sn
+doc_to_text: "You are a Named Entity Recognition expert in chiShona language. \nExtract\
+  \ all named entities from the following chiShona text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sn_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_sw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb3d69595796755c03f4f02f201d36e121e4b6bd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_sw.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sw
+doc_to_text: "You are a Named Entity Recognition expert in Kiswahili language. \n\
+  Extract all named entities from the following Kiswahili text and categorize them\
+  \ into PERSON, LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows\
+  \ the format: label: entity $$ label: entity, with each unique entity on a separate\
+  \ label line, avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant\
+  \ entries like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sw_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_tn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_tn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d42d164ad81853e92855fd51e73bd0e276d4c761
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_tn.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tn
+doc_to_text: "You are a Named Entity Recognition expert in Setswana language. \nExtract\
+  \ all named entities from the following Setswana text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tn_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_tw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_tw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62b4e2af7c4916e851c834a27c8357a91140d45b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_tw.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tw
+doc_to_text: "You are a Named Entity Recognition expert in Twi language. \nExtract\
+  \ all named entities from the following Twi text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tw_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_wo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_wo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6db45e2bccb590f2586494d3fb586b3e5966a17b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_wo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: wo
+doc_to_text: "You are a Named Entity Recognition expert in Wolof language. \nExtract\
+  \ all named entities from the following Wolof text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_wo_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_xh.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_xh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a697b274e71c54293edab7d6dbefabd719843ea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_xh.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: xh
+doc_to_text: "You are a Named Entity Recognition expert in isiXhosa language. \nExtract\
+  \ all named entities from the following isiXhosa text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_xh_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_yo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..589cd5b35a6b27c8a9075ee22b9f8fd98a11860d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_yo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "You are a Named Entity Recognition expert in Yoruba language. \nExtract\
+  \ all named entities from the following Yoruba text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_yo_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_zu.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c25d5a0c89da79ea982a35f8d032c3c489a16a80
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/masakhaner_zu.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: zu
+doc_to_text: "You are a Named Entity Recognition expert in isiZulu language. \nExtract\
+  \ all named entities from the following isiZulu text and categorize them into PERSON,\
+  \ LOCATION, ORGANIZATION, or DATE. Ensure the output strictly follows the format:\
+  \ label: entity $$ label: entity, with each unique entity on a separate label line,\
+  \ avoiding grouped entities (e.g., avoid LOC: entity, entity) or irrelevant entries\
+  \ like none. Return only the output \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_zu_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_3/utils.py b/lm_eval/tasks/afrobench/masakhaner/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..76909044e7f35948156f8bb506ce2fce563ec689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_3/utils.py
@@ -0,0 +1,146 @@
+import collections
+import re
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    return transform_text(doc["ner_tags"])
+
+
+def transform_text(text):
+    entities = []
+    current_entity = ""
+    current_tag = ""
+
+    for pair in text.split("\n"):
+        if pair:  # Check if the line is not empty
+            word, tag = pair.strip().split(": ")
+            tag = tag.upper()
+            word = word.lower()
+            word = word.strip(",.").strip()
+
+            if tag.startswith("B-"):
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                current_tag = tag.split("-")[1]
+                current_entity = word
+            elif tag.startswith("I-") and tag.split("-")[1] == current_tag:
+                current_entity += word
+            else:
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                    current_entity = ""
+                    current_tag = ""
+    if current_entity:
+        entities.append(f"{current_tag}: {current_entity}")
+
+        # Join all the transformed output lines with $$ as separator
+    return " $$ ".join(entities)
+
+
+def span_f1_agg(items):
+    """Computes Span based F1 score.
+
+    This function is copied from
+    https://github.com/google-research/multilingual-t5/blob/master/multilingual_t5/evaluation/metrics.py
+
+    Args:
+    targets: list of strings or list of list of strings if multiple references
+      are present.
+    predictions: list of strings
+
+    Returns:
+    span f1 across all targets and predictions (Based on CoNLL script)
+    """
+    unzipped_list = list(zip(*items))
+    targets = unzipped_list[0]
+    predictions = unzipped_list[1]
+
+    true_positives = collections.defaultdict(int)
+    false_positives = collections.defaultdict(int)
+    false_negatives = collections.defaultdict(int)
+
+    def normalize_text(strings):
+        def get_blank_spaces_pattern():
+            return re.compile(r"\s{3,}|\t")
+
+        def remove_blank_spaces(text):
+            text = re.sub(pattern=get_blank_spaces_pattern(), repl="", string=text)
+            text = re.sub("\s+", " ", text)
+            return text
+
+        def remove_punctuation(text):
+            my_punctuation = '!"$%&\'()*+,-./:;<=>?[\\]^_`{|}~•@.""-,`'
+            text = re.sub(
+                "[" + my_punctuation + "]+", " ", str(text)
+            )  # strip punctuation
+            return text
+
+        def remove_articles(text):
+            regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+            return re.sub(regex, " ", text)
+
+        def lowercase(text):
+            text = text.lower()
+            return text
+
+        strings = remove_punctuation(strings)
+        strings = remove_articles(strings)
+        strings = remove_blank_spaces(strings)
+        strings = lowercase(strings)
+
+        return strings
+
+    def tags_to_spans(tag_sequence, delimiter="$$"):
+        """Extract spans from IOB1 or BIO tags."""
+        if isinstance(tag_sequence, list):
+            tag_sequence = " ".join(i.strip() for i in tag_sequence)
+        tag_sequence_split = [
+            item.strip()
+            for sub in tag_sequence.strip().split(delimiter)
+            for item in sub.split("$")
+            if item
+        ]
+        tag_sequence_split = [
+            item.strip()
+            for value in tag_sequence_split
+            for sub in value.split(". ")
+            for item in sub.split(", ")
+        ]
+        tags_entities = []
+        for tag_entity in tag_sequence_split:
+            tag_entity_split = tag_entity.split(": ")
+            if len(tag_entity_split) != 2:
+                continue
+            tag = normalize_text(tag_entity_split[0].strip())
+            entity = normalize_text(tag_entity_split[1].rstrip().lstrip())
+            tags_entities.append((tag, entity))
+        return tags_entities
+
+    def compute_f1_metrics(true_positive, false_positive, false_negative):
+        precision = float(true_positive) / float(true_positive + false_positive + 1e-13)
+        recall = float(true_positive) / float(true_positive + false_negative + 1e-13)
+        f1_measures = 2.0 * ((precision * recall) / (precision + recall + 1e-13))
+        return precision, recall, f1_measures
+
+    for target, pred in zip(targets, predictions):
+        gold_spans = tags_to_spans(target)
+        predicted_spans = tags_to_spans(pred)
+
+        for span in predicted_spans:
+            if span in gold_spans:
+                true_positives[span[0]] += 1
+                gold_spans.remove(span)
+            else:
+                false_positives[span[0]] += 1
+        # These spans weren't predicted.
+        for span in gold_spans:
+            false_negatives[span[0]] += 1
+
+    _, _, f1_measure = compute_f1_metrics(
+        sum(true_positives.values()),
+        sum(false_positives.values()),
+        sum(false_negatives.values()),
+    )
+    return f1_measure
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner
new file mode 100644
index 0000000000000000000000000000000000000000..5c0ae52e62da27b202b794918ac568747195de34
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner
@@ -0,0 +1,26 @@
+tag:
+- masakhaner_tasks
+- masakhaner_prompt_4
+dataset_path: masakhane/masakhaner-x
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: target
+filter_list:
+ - name: flexible-extract
+   filter:
+     - function: format_span
+metric_list:
+  - metric: f1
+    aggregation: !function utils.span_f1_agg
+    higher_is_better: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_am.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19b06221f2366dbeb7418961ecbf00f6b1146f1d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_am.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: am
+doc_to_text: "As a Amharic linguist, label all named entities in the Amharic text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_am_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_bbj.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..03ed5210a03e4ce5bc9afde45166519152a153c7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_bbj.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "As a Ghomala linguist, label all named entities in the Ghomala text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bbj_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_bm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_bm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e719db9ac9f7ffb2011e7af1f12bf6319cb1b9cc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_bm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: bm
+doc_to_text: "As a Bambara linguist, label all named entities in the Bambara text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bm_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ee.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ee.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe5fc75d28eef016bac579673cf1db875ee0a9f3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ee.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ee
+doc_to_text: "As a Ewe linguist, label all named entities in the Ewe text below with\
+  \ the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ee_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ha.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f88b9d19d4545c1131573a2303c6014421ed54b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ha.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "As a Hausa linguist, label all named entities in the Hausa text below\
+  \ with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ha_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ig.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ig.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4712d7e8edd36a87c59c9f0bc759f7b884ec830
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ig.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "As a Igbo linguist, label all named entities in the Igbo text below\
+  \ with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ig_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_lg.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_lg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd7bde4a6f098bc8e946d83b556a00e38de43a5b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_lg.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lg
+doc_to_text: "As a Luganda linguist, label all named entities in the Luganda text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_lg_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_luo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..92c0ddfa2c58c90a9da84e3dd3e002f9eb8c1098
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_luo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "As a Luo linguist, label all named entities in the Luo text below with\
+  \ the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_luo_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_mos.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2eb75d8e554dba222cf9f92fc6c8f013e7d232b2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_mos.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "As a Mossi linguist, label all named entities in the Mossi text below\
+  \ with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_mos_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ny.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ny.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8cb8218aff23a34f48455b2cbb897112a407f06
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_ny.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ny
+doc_to_text: "As a Chichewa linguist, label all named entities in the Chichewa text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ny_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_pcm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93f8ae3adb27f74df18c13a0ed886d176b62eead
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_pcm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "As a Nigerian Pidgin linguist, label all named entities in the Nigerian\
+  \ Pidgin text below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE.\
+  \ Ensure the output strictly follows the format: label: entity $$ label: entity,\
+  \ with each unique entity on a separate label line, avoiding grouped entities (e.g.,\
+  \ avoid LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_pcm_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_rw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_rw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d64d49925bf1668e0f2b5ebdb6516f4ba0668f88
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_rw.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: rw
+doc_to_text: "As a Kinyarwanda linguist, label all named entities in the Kinyarwanda\
+  \ text below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure\
+  \ the output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_rw_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_sn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_sn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40230fb1cebb4f1aa9d9001360389ef1cdfda64e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_sn.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sn
+doc_to_text: "As a chiShona linguist, label all named entities in the chiShona text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sn_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_sw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b27554ddd1faa7a796b5249c340b4204fdfaa5a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_sw.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sw
+doc_to_text: "As a Kiswahili linguist, label all named entities in the Kiswahili text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sw_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_tn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_tn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..88080456ba554e20a63f3c68638908b31d5294cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_tn.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tn
+doc_to_text: "As a Setswana linguist, label all named entities in the Setswana text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tn_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_tw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_tw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d2eec6befd2442e314abba2e2655dc8aa0baf4a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_tw.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: tw
+doc_to_text: "As a Twi linguist, label all named entities in the Twi text below with\
+  \ the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output strictly\
+  \ follows the format: label: entity $$ label: entity, with each unique entity on\
+  \ a separate label line, avoiding grouped entities (e.g., avoid LOC: entity, entity)\
+  \ or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tw_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_wo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_wo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41501cb385fe50b38d5d30afe592da4705585881
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_wo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: wo
+doc_to_text: "As a Wolof linguist, label all named entities in the Wolof text below\
+  \ with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_wo_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_xh.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_xh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b29fda3f444dd526ee7cc94f6579e74b6f63b97
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_xh.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: xh
+doc_to_text: "As a isiXhosa linguist, label all named entities in the isiXhosa text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_xh_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_yo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0c327bd53c90651e9c7e6b699d3fb9fb52748a1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_yo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "As a Yoruba linguist, label all named entities in the Yoruba text below\
+  \ with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output. \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_yo_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_zu.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24961ec759862a77b2ce608f4a5954ec62f139fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/masakhaner_zu.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: zu
+doc_to_text: "As a isiZulu linguist, label all named entities in the isiZulu text\
+  \ below with the categories: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the\
+  \ output strictly follows the format: label: entity $$ label: entity, with each\
+  \ unique entity on a separate label line, avoiding grouped entities (e.g., avoid\
+  \ LOC: entity, entity) or irrelevant entries like none. Return only the output.\
+  \ \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_zu_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_4/utils.py b/lm_eval/tasks/afrobench/masakhaner/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..76909044e7f35948156f8bb506ce2fce563ec689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_4/utils.py
@@ -0,0 +1,146 @@
+import collections
+import re
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    return transform_text(doc["ner_tags"])
+
+
+def transform_text(text):
+    entities = []
+    current_entity = ""
+    current_tag = ""
+
+    for pair in text.split("\n"):
+        if pair:  # Check if the line is not empty
+            word, tag = pair.strip().split(": ")
+            tag = tag.upper()
+            word = word.lower()
+            word = word.strip(",.").strip()
+
+            if tag.startswith("B-"):
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                current_tag = tag.split("-")[1]
+                current_entity = word
+            elif tag.startswith("I-") and tag.split("-")[1] == current_tag:
+                current_entity += word
+            else:
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                    current_entity = ""
+                    current_tag = ""
+    if current_entity:
+        entities.append(f"{current_tag}: {current_entity}")
+
+        # Join all the transformed output lines with $$ as separator
+    return " $$ ".join(entities)
+
+
+def span_f1_agg(items):
+    """Computes Span based F1 score.
+
+    This function is copied from
+    https://github.com/google-research/multilingual-t5/blob/master/multilingual_t5/evaluation/metrics.py
+
+    Args:
+    targets: list of strings or list of list of strings if multiple references
+      are present.
+    predictions: list of strings
+
+    Returns:
+    span f1 across all targets and predictions (Based on CoNLL script)
+    """
+    unzipped_list = list(zip(*items))
+    targets = unzipped_list[0]
+    predictions = unzipped_list[1]
+
+    true_positives = collections.defaultdict(int)
+    false_positives = collections.defaultdict(int)
+    false_negatives = collections.defaultdict(int)
+
+    def normalize_text(strings):
+        def get_blank_spaces_pattern():
+            return re.compile(r"\s{3,}|\t")
+
+        def remove_blank_spaces(text):
+            text = re.sub(pattern=get_blank_spaces_pattern(), repl="", string=text)
+            text = re.sub("\s+", " ", text)
+            return text
+
+        def remove_punctuation(text):
+            my_punctuation = '!"$%&\'()*+,-./:;<=>?[\\]^_`{|}~•@.""-,`'
+            text = re.sub(
+                "[" + my_punctuation + "]+", " ", str(text)
+            )  # strip punctuation
+            return text
+
+        def remove_articles(text):
+            regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+            return re.sub(regex, " ", text)
+
+        def lowercase(text):
+            text = text.lower()
+            return text
+
+        strings = remove_punctuation(strings)
+        strings = remove_articles(strings)
+        strings = remove_blank_spaces(strings)
+        strings = lowercase(strings)
+
+        return strings
+
+    def tags_to_spans(tag_sequence, delimiter="$$"):
+        """Extract spans from IOB1 or BIO tags."""
+        if isinstance(tag_sequence, list):
+            tag_sequence = " ".join(i.strip() for i in tag_sequence)
+        tag_sequence_split = [
+            item.strip()
+            for sub in tag_sequence.strip().split(delimiter)
+            for item in sub.split("$")
+            if item
+        ]
+        tag_sequence_split = [
+            item.strip()
+            for value in tag_sequence_split
+            for sub in value.split(". ")
+            for item in sub.split(", ")
+        ]
+        tags_entities = []
+        for tag_entity in tag_sequence_split:
+            tag_entity_split = tag_entity.split(": ")
+            if len(tag_entity_split) != 2:
+                continue
+            tag = normalize_text(tag_entity_split[0].strip())
+            entity = normalize_text(tag_entity_split[1].rstrip().lstrip())
+            tags_entities.append((tag, entity))
+        return tags_entities
+
+    def compute_f1_metrics(true_positive, false_positive, false_negative):
+        precision = float(true_positive) / float(true_positive + false_positive + 1e-13)
+        recall = float(true_positive) / float(true_positive + false_negative + 1e-13)
+        f1_measures = 2.0 * ((precision * recall) / (precision + recall + 1e-13))
+        return precision, recall, f1_measures
+
+    for target, pred in zip(targets, predictions):
+        gold_spans = tags_to_spans(target)
+        predicted_spans = tags_to_spans(pred)
+
+        for span in predicted_spans:
+            if span in gold_spans:
+                true_positives[span[0]] += 1
+                gold_spans.remove(span)
+            else:
+                false_positives[span[0]] += 1
+        # These spans weren't predicted.
+        for span in gold_spans:
+            false_negatives[span[0]] += 1
+
+    _, _, f1_measure = compute_f1_metrics(
+        sum(true_positives.values()),
+        sum(false_positives.values()),
+        sum(false_negatives.values()),
+    )
+    return f1_measure
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner
new file mode 100644
index 0000000000000000000000000000000000000000..09cd77e13106cca8862dcfa31b86c7742b97985a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner
@@ -0,0 +1,26 @@
+tag:
+- masakhaner_tasks
+- masakhaner_prompt_5
+dataset_path: masakhane/masakhaner-x
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: target
+filter_list:
+ - name: flexible-extract
+   filter:
+     - function: format_span
+metric_list:
+  - metric: f1
+    aggregation: !function utils.span_f1_agg
+    higher_is_better: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_am.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90c485745377aa9532eb0f6e7b35b15cd31d5414
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_am.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: am
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_am_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_bbj.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74726694ef67fd5f572c1a58b0b637cf410a9997
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_bbj.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bbj_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_bm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_bm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c97e0c22a609ceb20a40b5185a58baad850e4e81
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_bm.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: bm
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_bm_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ee.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ee.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6371649d2db361bfda7fabe8ed353ca529324375
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ee.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ee
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ee_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ha.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d68c7eed339d51ba9d72863624a2ee9842be64e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ha.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ha_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ig.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ig.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b8a429593210e2db50535011174cdbff26ad9c2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ig.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ig_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_lg.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_lg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84bdc8b9af8e76d09da546db020390531450ed85
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_lg.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: lg
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_lg_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_luo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55a0b5744cdaa4cf894ac882642978f66c7dbe5f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_luo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_luo_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_mos.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06bcc4467d43d8559b8e9b2cd4b0a89fa6b3fa40
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_mos.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_mos_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ny.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ny.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e400f10e186f0e4ae6119fa622065a23da73c680
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_ny.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: ny
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_ny_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_pcm.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9d897bc01a217c01d9fbffdf496d060ec54b434
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_pcm.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_pcm_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_rw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_rw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0742bc4dd567005a51861bab9b6208d64c66166f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_rw.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: rw
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_rw_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_sn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_sn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..56711335c8c11879338fbc3b245bc49d0702733e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_sn.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: sn
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sn_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_sw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c418beb45612e31ff1738909d5d1c181bfbab079
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_sw.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: sw
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_sw_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_tn.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_tn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf94a1081352827a7fe09eb913bfabd9e5f0c576
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_tn.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: tn
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tn_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_tw.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_tw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cad2e2e3e64819dc6ab3151929a6ec76bb868821
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_tw.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: tw
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_tw_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_wo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_wo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec7af039234047cda3500be81b361bae294bace2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_wo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: wo
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_wo_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_xh.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_xh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..debb164aef161a178ca53046e9b674a677d5fc08
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_xh.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: xh
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_xh_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_yo.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9abe1acbcb473c1bd091277c8a1913c792fdd0a3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_yo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_yo_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_zu.yaml b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5af591aa464ef88ff6a4d4b62e55c504cb777c4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/masakhaner_zu.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: zu
+doc_to_text: "Provide a concise list of named entities in the text below. Use the\
+  \ following labels: PERSON, LOCATION, ORGANIZATION, and DATE. Ensure the output\
+  \ strictly follows the format: label: entity $$ label: entity, with each unique\
+  \ entity on a separate label line, avoiding grouped entities (e.g., avoid LOC: entity,\
+  \ entity) or irrelevant entries like none. Return only the output.  \n\nText: {{text}}"
+include: masakhaner
+task: masakhaner_zu_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhaner/prompt_5/utils.py b/lm_eval/tasks/afrobench/masakhaner/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..76909044e7f35948156f8bb506ce2fce563ec689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhaner/prompt_5/utils.py
@@ -0,0 +1,146 @@
+import collections
+import re
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    return transform_text(doc["ner_tags"])
+
+
+def transform_text(text):
+    entities = []
+    current_entity = ""
+    current_tag = ""
+
+    for pair in text.split("\n"):
+        if pair:  # Check if the line is not empty
+            word, tag = pair.strip().split(": ")
+            tag = tag.upper()
+            word = word.lower()
+            word = word.strip(",.").strip()
+
+            if tag.startswith("B-"):
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                current_tag = tag.split("-")[1]
+                current_entity = word
+            elif tag.startswith("I-") and tag.split("-")[1] == current_tag:
+                current_entity += word
+            else:
+                if current_entity:
+                    entities.append(f"{current_tag}: {current_entity}")
+                    current_entity = ""
+                    current_tag = ""
+    if current_entity:
+        entities.append(f"{current_tag}: {current_entity}")
+
+        # Join all the transformed output lines with $$ as separator
+    return " $$ ".join(entities)
+
+
+def span_f1_agg(items):
+    """Computes Span based F1 score.
+
+    This function is copied from
+    https://github.com/google-research/multilingual-t5/blob/master/multilingual_t5/evaluation/metrics.py
+
+    Args:
+    targets: list of strings or list of list of strings if multiple references
+      are present.
+    predictions: list of strings
+
+    Returns:
+    span f1 across all targets and predictions (Based on CoNLL script)
+    """
+    unzipped_list = list(zip(*items))
+    targets = unzipped_list[0]
+    predictions = unzipped_list[1]
+
+    true_positives = collections.defaultdict(int)
+    false_positives = collections.defaultdict(int)
+    false_negatives = collections.defaultdict(int)
+
+    def normalize_text(strings):
+        def get_blank_spaces_pattern():
+            return re.compile(r"\s{3,}|\t")
+
+        def remove_blank_spaces(text):
+            text = re.sub(pattern=get_blank_spaces_pattern(), repl="", string=text)
+            text = re.sub("\s+", " ", text)
+            return text
+
+        def remove_punctuation(text):
+            my_punctuation = '!"$%&\'()*+,-./:;<=>?[\\]^_`{|}~•@.""-,`'
+            text = re.sub(
+                "[" + my_punctuation + "]+", " ", str(text)
+            )  # strip punctuation
+            return text
+
+        def remove_articles(text):
+            regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+            return re.sub(regex, " ", text)
+
+        def lowercase(text):
+            text = text.lower()
+            return text
+
+        strings = remove_punctuation(strings)
+        strings = remove_articles(strings)
+        strings = remove_blank_spaces(strings)
+        strings = lowercase(strings)
+
+        return strings
+
+    def tags_to_spans(tag_sequence, delimiter="$$"):
+        """Extract spans from IOB1 or BIO tags."""
+        if isinstance(tag_sequence, list):
+            tag_sequence = " ".join(i.strip() for i in tag_sequence)
+        tag_sequence_split = [
+            item.strip()
+            for sub in tag_sequence.strip().split(delimiter)
+            for item in sub.split("$")
+            if item
+        ]
+        tag_sequence_split = [
+            item.strip()
+            for value in tag_sequence_split
+            for sub in value.split(". ")
+            for item in sub.split(", ")
+        ]
+        tags_entities = []
+        for tag_entity in tag_sequence_split:
+            tag_entity_split = tag_entity.split(": ")
+            if len(tag_entity_split) != 2:
+                continue
+            tag = normalize_text(tag_entity_split[0].strip())
+            entity = normalize_text(tag_entity_split[1].rstrip().lstrip())
+            tags_entities.append((tag, entity))
+        return tags_entities
+
+    def compute_f1_metrics(true_positive, false_positive, false_negative):
+        precision = float(true_positive) / float(true_positive + false_positive + 1e-13)
+        recall = float(true_positive) / float(true_positive + false_negative + 1e-13)
+        f1_measures = 2.0 * ((precision * recall) / (precision + recall + 1e-13))
+        return precision, recall, f1_measures
+
+    for target, pred in zip(targets, predictions):
+        gold_spans = tags_to_spans(target)
+        predicted_spans = tags_to_spans(pred)
+
+        for span in predicted_spans:
+            if span in gold_spans:
+                true_positives[span[0]] += 1
+                gold_spans.remove(span)
+            else:
+                false_positives[span[0]] += 1
+        # These spans weren't predicted.
+        for span in gold_spans:
+            false_negatives[span[0]] += 1
+
+    _, _, f1_measure = compute_f1_metrics(
+        sum(true_positives.values()),
+        sum(false_positives.values()),
+        sum(false_negatives.values()),
+    )
+    return f1_measure
diff --git a/lm_eval/tasks/afrobench/masakhanews/README.md b/lm_eval/tasks/afrobench/masakhanews/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..16df2df1d62f2d83d6d34e22373d6680a246eaa8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/README.md
@@ -0,0 +1,99 @@
+#
+
+## Paper
+Title: `MasakhaNEWS: News Topic Classification for African languages`
+
+Paper Link: https://aclanthology.org/2023.ijcnlp-main.10/
+
+## Abstract
+>African languages are severely under-represented in NLP research due to lack of datasets covering several NLP tasks. While there are individual language specific datasets that are being expanded to different tasks, only a handful of NLP tasks (e.g. named entity recognition and machine translation) have standardized benchmark datasets covering several geographical and typologically-diverse African languages. In this paper, we develop MasakhaNEWS -- a new benchmark dataset for news topic classification covering 16 languages widely spoken in Africa. We provide an evaluation of baseline models by training classical machine learning models and fine-tuning several language models. Furthermore, we explore several alternatives to full fine-tuning of language models that are better suited for zero-shot and few-shot learning such as cross-lingual parameter-efficient fine-tuning (like MAD-X), pattern exploiting training (PET), prompting language models (like ChatGPT), and prompt-free sentence transformer fine-tuning (SetFit and Cohere Embedding API). Our evaluation in zero-shot setting shows the potential of prompting ChatGPT for news topic classification in low-resource African languages, achieving an average performance of 70 F1 points without leveraging additional supervision like MAD-X. In few-shot setting, we show that with as little as 10 examples per label, we achieved more than 90% (i.e. 86.0 F1 points) of the performance of full supervised training (92.6 F1 points) leveraging the PET approach.
+
+HomePage: https://github.com/masakhane-io/masakhane-news
+
+### Citation
+
+```
+@inproceedings{adelani-etal-2023-masakhanews,
+    title = "{M}asakha{NEWS}: News Topic Classification for {A}frican languages",
+    author = "Adelani, David Ifeoluwa  and
+      Masiak, Marek  and
+      Azime, Israel Abebe  and
+      Alabi, Jesujoba  and
+      Tonja, Atnafu Lambebo  and
+      Mwase, Christine  and
+      Ogundepo, Odunayo  and
+      Dossou, Bonaventure F. P.  and
+      Oladipo, Akintunde  and
+      Nixdorf, Doreen  and
+      Emezue, Chris Chinenye  and
+      Al-azzawi, Sana  and
+      Sibanda, Blessing  and
+      David, Davis  and
+      Ndolela, Lolwethu  and
+      Mukiibi, Jonathan  and
+      Ajayi, Tunde  and
+      Moteu, Tatiana  and
+      Odhiambo, Brian  and
+      Owodunni, Abraham  and
+      Obiefuna, Nnaemeka  and
+      Mohamed, Muhidin  and
+      Muhammad, Shamsuddeen Hassan  and
+      Ababu, Teshome Mulugeta  and
+      Salahudeen, Saheed Abdullahi  and
+      Yigezu, Mesay Gemeda  and
+      Gwadabe, Tajuddeen  and
+      Abdulmumin, Idris  and
+      Taye, Mahlet  and
+      Awoyomi, Oluwabusayo  and
+      Shode, Iyanuoluwa  and
+      Adelani, Tolulope  and
+      Abdulganiyu, Habiba  and
+      Omotayo, Abdul-Hakeem  and
+      Adeeko, Adetola  and
+      Afolabi, Abeeb  and
+      Aremu, Anuoluwapo  and
+      Samuel, Olanrewaju  and
+      Siro, Clemencia  and
+      Kimotho, Wangari  and
+      Ogbu, Onyekachi  and
+      Mbonu, Chinedu  and
+      Chukwuneke, Chiamaka  and
+      Fanijo, Samuel  and
+      Ojo, Jessica  and
+      Awosan, Oyinkansola  and
+      Kebede, Tadesse  and
+      Sakayo, Toadoum Sari  and
+      Nyatsine, Pamela  and
+      Sidume, Freedmore  and
+      Yousuf, Oreen  and
+      Oduwole, Mardiyyah  and
+      Tshinu, Kanda  and
+      Kimanuka, Ussen  and
+      Diko, Thina  and
+      Nxakama, Siyanda  and
+      Nigusse, Sinodos  and
+      Johar, Abdulmejid  and
+      Mohamed, Shafie  and
+      Hassan, Fuad Mire  and
+      Mehamed, Moges Ahmed  and
+      Ngabire, Evrard  and
+      Jules, Jules  and
+      Ssenkungu, Ivan  and
+      Stenetorp, Pontus",
+    editor = "Park, Jong C.  and
+      Arase, Yuki  and
+      Hu, Baotian  and
+      Lu, Wei  and
+      Wijaya, Derry  and
+      Purwarianti, Ayu  and
+      Krisnadhi, Adila Alfa",
+    booktitle = "Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
+    month = nov,
+    year = "2023",
+    address = "Nusa Dua, Bali",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.ijcnlp-main.10/",
+    doi = "10.18653/v1/2023.ijcnlp-main.10",
+    pages = "144--159"
+}
+```
diff --git a/lm_eval/tasks/afrobench/masakhanews/masakhanews.yaml b/lm_eval/tasks/afrobench/masakhanews/masakhanews.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93b6f29d8cdc05cf0904e4a8fe9afdd35d111c88
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/masakhanews.yaml
@@ -0,0 +1,13 @@
+group: masakhanews
+task:
+  - masakhanews_prompt_1
+  - masakhanews_prompt_2
+  - masakhanews_prompt_3
+  - masakhanews_prompt_4
+  - masakhanews_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews
new file mode 100644
index 0000000000000000000000000000000000000000..282a38422e526b9f8ce8731f950ae04e2a6cbf08
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews
@@ -0,0 +1,43 @@
+tag:
+- masakhanews_tasks
+- masakhanews_prompt_1
+- afrobench_TC_tasks
+dataset_path: masakhane/masakhanews
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "business"
+    - "entertainment"
+    - "health"
+    - "politics"
+    - "religion"
+    - "sports"
+    - "technology"
+should_decontaminate: true
+doc_to_decontamination_query: headline_text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_amh.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d45b784facda2e60335d1980b9a1038c5cb91ec0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_amh_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_eng.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40685c17d8616dafb3797ab84153f77242e6a364
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_fra.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2371172156b0cfb8532d93366518f2faf8793aed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_fra_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_hau.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7288982d35eaaad09407c59b6b6d02af3fb637a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_ibo.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4bf65cca861c670be75aa9b63a914590d4d996c4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_lin.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6cdbe8de5ef1593bd2f98d75ec4ecca3fc33084
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_lin_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_lug.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2f0ec1bada04bf7c72649621330e828bd449951
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_orm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9bff1ac5113f06c5d035e46c0e139fbeb0a8d28
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_orm_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_pcm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..119b01bb158d15d307949226dc71a055d169e4fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_pcm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_pcm_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_run.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8bc2923fa069f71d5bb00d4f272c235d6e7f0c6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_run.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: run
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_run_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_sna.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee4fabdc96a680e0482040f00daca794eaf87dfe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_sna_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_som.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..88d7774c1b4c1c08367d8ebc79bc1ca7214cab9f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_som.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_som_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_swa.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4e02aae282bdba08b8f057c71ab580a7ee9c031
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_tir.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72fa30ae7012379011b04a1fb3255fcad2a9a4e7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_tir.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_tir_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_xho.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d98b3b681de265b99207db0d3d2347d952d05a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_xho_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_yor.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3ef4eec0e46425b9b02771ee6a360f3498cc0a1a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/masakhanews_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Given the categories technology, business, politics, sports, health,\
+  \ entertainment, or religion; what category does the text: '{{headline_text}}' belong\
+  \ to: \n\n"
+include: masakhanews
+task: masakhanews_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_1/utils.py b/lm_eval/tasks/afrobench/masakhanews/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_1/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews
new file mode 100644
index 0000000000000000000000000000000000000000..c174d2c7ff991b749881260b1ccb93d63a5e9f26
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews
@@ -0,0 +1,43 @@
+tag:
+- masakhanews_tasks
+- masakhanews_prompt_2
+- afrobench_TC_tasks
+dataset_path: masakhane/masakhanews
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "business"
+    - "entertainment"
+    - "health"
+    - "politics"
+    - "religion"
+    - "sports"
+    - "technology"
+should_decontaminate: true
+doc_to_decontamination_query: headline_text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_amh.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cee7619cfb4c215a9e05551b668d2ea4e9e517ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_amh.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: 'Does this Amharic topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_amh_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_eng.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3d6dd16c461eb7df5b730aa3283f6695f130503
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_eng.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: 'Does this English topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_fra.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c35a6a1d34753893185c2d0fcd5dd82e73853e35
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_fra.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: 'Does this French topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_fra_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_hau.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93e9cc5a7f3e53fd8fc97ee1ecf8bfbb85911939
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_hau.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Does this Hausa topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_ibo.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1638e435c76f6fdd671a6bf165d81debc4be3b3a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_ibo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Does this Igbo topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_lin.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0010d0e1ad36356f8c5b9bccccffc62f46730d93
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_lin.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: 'Does this Lingala topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_lin_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_lug.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d526067289d1145ed494418408b0de55d51a74ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_lug.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: 'Does this Luganda topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_orm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd04c845d4eb1d46dd48052f0a582d7557e610a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_orm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: 'Does this Afaan Oromoo topic; ''{{headline_text}}'' belong to one of
+  the following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_orm_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_pcm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de685e3ac8a62a2e5ca5b3d0b16119a77caa1994
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_pcm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: 'Does this Nigerian Pidgin topic; ''{{headline_text}}'' belong to one
+  of the following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_pcm_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_run.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62236d590bfc009043dd8dca6ab3c43edcaff995
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_run.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: run
+doc_to_text: 'Does this Kirundi topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_run_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_sna.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a97e176b865335804af5efe9e911894acbbcc78
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_sna.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: 'Does this Shona topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_sna_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_som.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..318b9b87beabeef3837c826cbf128b3b8d1b4e8d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_som.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: som
+doc_to_text: 'Does this Somali topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_som_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_swa.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75b9345f3229c6e105f4cd99e65a970c1061b7d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_swa.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: 'Does this Swahili topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_tir.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..258a2bd3d7431083088e09f74044ce518cbaa7b1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_tir.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: 'Does this Tigrinya topic; ''{{headline_text}}'' belong to one of the
+  following categories: technology, business, politics, sports, health, entertainment,
+  or religion? category only
+
+
+  '
+include: masakhanews
+task: masakhanews_tir_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_xho.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30c4c3ac3abdb3b239406a2d36efc9331b1597d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_xho.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: 'Does this Xhosa topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_xho_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_yor.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..067cf10632de430b650053d5537a733052e34b09
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/masakhanews_yor.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Does this Yoruba topic; ''{{headline_text}}'' belong to one of the following
+  categories: technology, business, politics, sports, health, entertainment, or religion?
+  category only
+
+
+  '
+include: masakhanews
+task: masakhanews_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_2/utils.py b/lm_eval/tasks/afrobench/masakhanews/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_2/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews
new file mode 100644
index 0000000000000000000000000000000000000000..ecc2108967078bb24a1efd15acdd8387d47e173c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews
@@ -0,0 +1,43 @@
+tag:
+- masakhanews_tasks
+- masakhanews_prompt_3
+- afrobench_TC_tasks
+dataset_path: masakhane/masakhanews
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "business"
+    - "entertainment"
+    - "health"
+    - "politics"
+    - "religion"
+    - "sports"
+    - "technology"
+should_decontaminate: true
+doc_to_decontamination_query: headline_text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_amh.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dec10d2963dd00fce7e8dbd7f24f8a61a178e0a7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_amh.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Amharic statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_amh_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_eng.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a8b7159e215dd6bc5a766d51b06f77289e4ce1a7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_eng.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the English statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_fra.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..328316a8361d29a4db6ab882b46944fc65b2ff9b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_fra.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the French statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_fra_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_hau.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c18ff2779cc9a9d149afe1eb7c438e3d18e8af2c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_hau.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Hausa statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_ibo.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a91db840f2b72f041cffb827ea87520e28434cb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Igbo statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_lin.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19c4cca2e5f65851b6c44a1baa6dd2842ce3bd5f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_lin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Lingala statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_lin_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_lug.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e3d4319fc82762f63292da9f916166132e53a42
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_lug.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Luganda statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_orm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bacf0420b81df236351a3698e37cf3eca8983e7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_orm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Afaan Oromoo statement below? Return only the category.\
+  \ \n\ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_orm_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_pcm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e873becd56c21cd0d92841994a4fb6bed5119b51
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_pcm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Nigerian Pidgin statement below? Return only the category.\
+  \ \n\ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_pcm_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_run.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..307e13710dde72132a0df4011500aca4ccfd9e22
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_run.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: run
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Kirundi statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_run_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_sna.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee69be3de11efe89bac3dd355f4a9ffe99206c37
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_sna.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Shona statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_sna_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_som.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c181fddb819a9f42f18a31141bc71f837f759cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_som.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: som
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Somali statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_som_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_swa.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbe1c4200f9953d603eae6262067facfc09fe694
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_swa.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Swahili statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_tir.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b6055da2859d8932b3d6c40130d895846069f285
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_tir.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Tigrinya statement below? Return only the category. \n\
+  \ntext: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_tir_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_xho.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..110fc08778130c4880b904a682211df4800a2cd5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_xho.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Xhosa statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_xho_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_yor.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d31e9b23fdfbdc5bbc85be20457c80c9b90c4a31
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/masakhanews_yor.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories technology, religion, politics, sports, health, entertainment, or business;\
+  \ what is the topic of the Yoruba statement below? Return only the category. \n\n\
+  text: {{headline_text}} \\category:\n\n"
+include: masakhanews
+task: masakhanews_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_3/utils.py b/lm_eval/tasks/afrobench/masakhanews/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_3/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews
new file mode 100644
index 0000000000000000000000000000000000000000..a1801f4e00b1d90a885ed9d73a14c2745cd73f01
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews
@@ -0,0 +1,43 @@
+tag:
+- masakhanews_tasks
+- masakhanews_prompt_4
+- afrobench_TC_tasks
+dataset_path: masakhane/masakhanews
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "business"
+    - "entertainment"
+    - "health"
+    - "politics"
+    - "religion"
+    - "sports"
+    - "technology"
+should_decontaminate: true
+doc_to_decontamination_query: headline_text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_amh.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a76305859c1e27a1b00b3be6492a76b207d313da
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_amh_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_eng.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8567113756fde2e8e98c5f6f2f68073a5d14550b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_eng_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_fra.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f86635f6d5edf28590264ae45a0f3546d868feb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_fra_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_hau.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1b7ce562b51486f41ce75c6716eda24d56caf1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_ibo.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d76a905d62d2e93f09608684592dff02c60f131c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_lin.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e0247529b5c4496cce3f52651f6969e894484bb1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_lin_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_lug.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca02c0a5fcbd248c82e945646d932614c8e515e8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_lug_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_orm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..781eb4cc977bd0ed74698737c56c9f97190b9623
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_orm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_orm_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_pcm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93ad9f482b3539efd0b4e7ab89b64ab75ce91147
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_pcm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_pcm_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_run.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5d985481f1b4443cadd4c6d1ef12424825e02cb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_run.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: run
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_run_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_sna.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2676db850ff3b7ec98dfdd2b596e8d3011b30915
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_sna_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_som.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6562da417b3a50c0d712038db88bd4f205c13df8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_som.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_som_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_swa.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3bb9764ad0dc0c69ba98fc85fcb1a51cda37c3b8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_tir.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3dfb1d4e7de9510175386192bcdf7f4524181308
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_tir.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_tir_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_xho.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c1b51c20386a2c1d5196d4da47223603b5637ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_xho_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_yor.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d22d1c7f59e8e2103d605b3e5c9c4dd08811bfb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/masakhanews_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Label the following text as technology, religion, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_4/utils.py b/lm_eval/tasks/afrobench/masakhanews/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_4/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews
new file mode 100644
index 0000000000000000000000000000000000000000..8d76af03ab044d68314853c0a0005a05141c1dca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews
@@ -0,0 +1,43 @@
+tag:
+- masakhanews_tasks
+- masakhanews_prompt_5
+- afrobench_TC_tasks
+dataset_path: masakhane/masakhanews
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "business"
+    - "entertainment"
+    - "health"
+    - "politics"
+    - "religion"
+    - "sports"
+    - "technology"
+should_decontaminate: true
+doc_to_decontamination_query: headline_text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_amh.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..759ce913fe968c78eed1f302719b61dd0d62aa2a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_amh.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: amh
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Amharic text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_amh_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_eng.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c03032b48ecf521e9563277d2b149c703171321
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_eng.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: eng
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ English text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_eng_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_fra.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..603d149d733336355b4874a5bbffe61786a9edd7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_fra.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: fra
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ French text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_fra_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_hau.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04a478cf6a5b63269c1ef2ef061d50fd08f95c11
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_hau.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Hausa text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_ibo.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce3cc15b942e3cdbf02fa8b885aa2b796b409544
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_ibo.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Igbo text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_lin.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e22303fe79bca34cef9784b7ecea4fe1d1a39ab7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_lin.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: lin
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Lingala text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_lin_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_lug.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe949b6f3c7c60a48d72ecf58d47aac7d36cb130
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_lug.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Luganda text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_lug_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_orm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_orm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..413e88125dc1e69cb4aac285ed8f8fc59b887bfe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_orm.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: orm
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Afaan Oromoo text. For each input, classify the topic as technology, business,\
+  \ politics, sports, health, entertainment, or religion. Use the following guidelines:\
+  \ \n\n technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_orm_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_pcm.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9322857eaaa71c77ed2399e23470b8085ebd4f5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_pcm.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Nigerian Pidgin text. For each input, classify the topic as technology, business,\
+  \ politics, sports, health, entertainment, or religion. Use the following guidelines:\
+  \ \n\n technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_pcm_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_run.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f207fb703debacf6655975485fe188b13f4313d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_run.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: run
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kirundi text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_run_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_sna.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..737d335e6df283c7bf9f81c186c6e90f0cd81991
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_sna.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Shona text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_sna_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_som.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..39bb80c47bd7f16e34ec4ebefbdea0a08e6a6bef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_som.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: som
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Somali text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_som_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_swa.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c59e359c21af54c0f9e78950925fb16e2e0e5b29
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_swa.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Swahili text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_tir.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..959de7a803f556ecde803744c6dc1451ca4493d0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_tir.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: tir
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tigrinya text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_tir_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_xho.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..35cad7295a830661882c30930153700936817082
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_xho.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Xhosa text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_xho_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_yor.yaml b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e83c70454d5cd330383f49a7fa3bbaf0f1226790
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/masakhanews_yor.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Yoruba text. For each input, classify the topic as technology, business, politics,\
+  \ sports, health, entertainment, or religion. Use the following guidelines: \n\n\
+  \ technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \nreligion: The text talks about relgions, religious institutions and\
+  \ beliefs or related topics. \n\nbusiness: The text covers economy, business, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{headline_text}} \\category: \n\n"
+include: masakhanews
+task: masakhanews_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhanews/prompt_5/utils.py b/lm_eval/tasks/afrobench/masakhanews/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/prompt_5/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/masakhanews/utils.py b/lm_eval/tasks/afrobench/masakhanews/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..310a7aeb5af2b998d57c6a793f27b00c8ab04029
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhanews/utils.py
@@ -0,0 +1,127 @@
+import argparse
+import os
+
+import yaml
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Given the categories technology, business, politics, sports, health, entertainment, or religion; what category does the text: '{{headline}}' belong to: \n\n",
+        "prompt_2": f"Does this {lang} topic; "
+        "'{{headline}}' belong to one of the following categories: technology, business, politics, sports, health, entertainment, or religion? category only\n\n",
+        "prompt_3": f"You are an assistant able to classify topics in texts. \n\n"
+        f"Given the categories technology, religion, politics, sports, health, entertainment, or business; what is "
+        f"the topic of the {lang} statement below? Return only the category. "
+        "\n\ntext: {{headline}} \category:\n\n",
+        "prompt_4": "Label the following text as technology, religion, politics, sports, health, entertainment, or geography. Provide only the category as your "
+        "response. \n\ntext: {{headline}} \category: \n\n",
+        "prompt_5": f"You are tasked with performing topic classification on the following {lang} text. "
+        f"For each input, classify the topic as technology, business, politics, sports, health, entertainment, or religion. "
+        f"Use the following guidelines: \n\n "
+        f"technology: The text discusses scientific discoveries, technological advancements, or related topics. \n"
+        f"politics: The text covers political events, policies, or related topics. \n"
+        f"sports: The text talks about sports events, athletes, or related topics. \n"
+        f"health: The text addresses health issues, medical advancements, or related topics. \n"
+        f"entertainment: The text pertains to movies, music, celebrities, or related topics. \n"
+        f"religion: The text talks about relgions, religious institutions and beliefs or related topics. \n\n"
+        f"business: The text covers economy, business, or related topics. \n\n"
+        f"If the text contains multiple topics, choose the dominant topic. "
+        f"For ambiguous or unclear topics, select the category that best reflects the overall content. "
+        "Please provide a single classification for each input.\n\ntext: {{headline}} \category: \n\n",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "amh": "Amharic",
+        "eng": "English",
+        "fra": "French",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "lin": "Lingala",
+        "lug": "Luganda",
+        "orm": "Afaan Oromoo",
+        "pcm": "Nigerian Pidgin",
+        "run": "Kirundi",
+        "sna": "Shona",
+        "som": "Somali",
+        "swa": "Swahili",
+        "tir": "Tigrinya",
+        "xho": "Xhosa",
+        "yor": "Yoruba",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"masakhanews_{lang}.yaml"
+            task_name = f"masakhanews_{lang}_{mode}"
+            yaml_template = "masakhanews"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+
+    PROMPT_CHOICES = ["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"]
+    parser.add_argument(
+        "--mode",
+        nargs="*",
+        default=PROMPT_CHOICES,
+        choices=PROMPT_CHOICES,
+        help="Prompt number(s)",
+    )
+    args = parser.parse_args()
+
+    for mode in args.mode:
+        gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/masakhapos/README.md b/lm_eval/tasks/afrobench/masakhapos/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1fcf11c780e88864fef93b46ef536cc11f33e60b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/README.md
@@ -0,0 +1,75 @@
+#
+
+## Paper
+Title: `MasakhaPOS: Part-of-Speech Tagging for Typologically Diverse African languages`
+
+Paper Link: https://aclanthology.org/2023.acl-long.609/
+
+## Abstract
+>In this paper, we present AfricaPOS, the largest part-of-speech (POS) dataset for 20 typologically diverse African languages. We discuss the challenges in annotating POS for these languages using the universal dependencies (UD) guidelines. We conducted extensive POS baseline experiments using both conditional random field and several multilingual pre-trained language models. We applied various cross-lingual transfer models trained with data available in the UD. Evaluating on the AfricaPOS dataset, we show that choosing the best transfer language(s) in both single-source and multi-source setups greatly improves the POS tagging performance of the target languages, in particular when combined with parameter-fine-tuning methods. Crucially, transferring knowledge from a language that matches the language family and morphosyntactic properties seems to be more effective for POS tagging in unseen languages.
+
+HomePage: https://github.com/masakhane-io/masakhane-pos
+
+### Citation
+
+```
+@inproceedings{dione-etal-2023-masakhapos,
+    title = "{M}asakha{POS}: Part-of-Speech Tagging for Typologically Diverse {A}frican languages",
+    author = "Dione, Cheikh M. Bamba  and
+      Adelani, David Ifeoluwa  and
+      Nabende, Peter  and
+      Alabi, Jesujoba  and
+      Sindane, Thapelo  and
+      Buzaaba, Happy  and
+      Muhammad, Shamsuddeen Hassan  and
+      Emezue, Chris Chinenye  and
+      Ogayo, Perez  and
+      Aremu, Anuoluwapo  and
+      Gitau, Catherine  and
+      Mbaye, Derguene  and
+      Mukiibi, Jonathan  and
+      Sibanda, Blessing  and
+      Dossou, Bonaventure F. P.  and
+      Bukula, Andiswa  and
+      Mabuya, Rooweither  and
+      Tapo, Allahsera Auguste  and
+      Munkoh-Buabeng, Edwin  and
+      Memdjokam Koagne, Victoire  and
+      Ouoba Kabore, Fatoumata  and
+      Taylor, Amelia  and
+      Kalipe, Godson  and
+      Macucwa, Tebogo  and
+      Marivate, Vukosi  and
+      Gwadabe, Tajuddeen  and
+      Elvis, Mboning Tchiaze  and
+      Onyenwe, Ikechukwu  and
+      Atindogbe, Gratien  and
+      Adelani, Tolulope  and
+      Akinade, Idris  and
+      Samuel, Olanrewaju  and
+      Nahimana, Marien  and
+      Musabeyezu, Th{\'e}og{\`e}ne  and
+      Niyomutabazi, Emile  and
+      Chimhenga, Ester  and
+      Gotosa, Kudzai  and
+      Mizha, Patrick  and
+      Agbolo, Apelete  and
+      Traore, Seydou  and
+      Uchechukwu, Chinedu  and
+      Yusuf, Aliyu  and
+      Abdullahi, Muhammad  and
+      Klakow, Dietrich",
+    editor = "Rogers, Anna  and
+      Boyd-Graber, Jordan  and
+      Okazaki, Naoaki",
+    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
+    month = jul,
+    year = "2023",
+    address = "Toronto, Canada",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.acl-long.609/",
+    doi = "10.18653/v1/2023.acl-long.609",
+    pages = "10883--10900",
+    abstract = "In this paper, we present AfricaPOS, the largest part-of-speech (POS) dataset for 20 typologically diverse African languages. We discuss the challenges in annotating POS for these languages using the universal dependencies (UD) guidelines. We conducted extensive POS baseline experiments using both conditional random field and several multilingual pre-trained language models. We applied various cross-lingual transfer models trained with data available in the UD. Evaluating on the AfricaPOS dataset, we show that choosing the best transfer language(s) in both single-source and multi-source setups greatly improves the POS tagging performance of the target languages, in particular when combined with parameter-fine-tuning methods. Crucially, transferring knowledge from a language that matches the language family and morphosyntactic properties seems to be more effective for POS tagging in unseen languages."
+}
+```
diff --git a/lm_eval/tasks/afrobench/masakhapos/gen_utils.py b/lm_eval/tasks/afrobench/masakhapos/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..52b9dafb435cf5f24664d7fb9c8ba73a687a7d4c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/gen_utils.py
@@ -0,0 +1,151 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Please provide the POS tags for each word in the input sentence. The input will be a list of "
+        "words in the sentence. The output format should be a list of tuples, where each tuple consists of "
+        "a word from the input text and its corresponding POS tag label from the tag label set: ['ADJ', "
+        "'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', "
+        "'SCONJ', 'SYM', 'VERB', 'X']. \nYour response should include only a list of tuples, in the order "
+        "that the words appear in the input sentence, including punctuations, with each tuple containing the corresponding POS tag "
+        "label for a word. \n\nSentence: {{tokens}} \nOutput: ",
+        "prompt_2": f"You are an expert in tagging words and sentences in {lang} with the right POS tag. "
+        f"\n\nPlease provide the POS tags for each word in the {lang} sentence. The input is a list of words in"
+        " the sentence. POS tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', "
+        "'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']. The output format should "
+        "be a list of tuples, where each tuple consists of a word from the input text and its corresponding"
+        " POS tag label from the POS tag label set provided\nYour response should include only a list of "
+        "tuples, in the order that the words appear in the input sentence, including punctuations, with each tuple containing the "
+        "corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: ",
+        "prompt_3": f"Acting as a {lang} linguist and without making any corrections or changes to the text, perform a part of "
+        "speech (POS) analysis of the sentences using the following POS tag label annotation ['ADJ', "
+        "'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', "
+        "'SCONJ', 'SYM', 'VERB', 'X']. The input will be a list of words in the sentence. The output format should "
+        "be a list of tuples, where each tuple consists of a word from the input text and its corresponding"
+        " POS tag label from the POS tag label set provided\nYour response should include only a list of "
+        "tuples, in the order that the words appear in the input sentence, including punctuations, with each tuple containing the "
+        "corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: ",
+        "prompt_4": "Annotate each word in the provided sentence with the appropriate POS tag. The annotation "
+        "list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', "
+        "'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']. The input sentence will be a list of words"
+        " in the sentence. The output format should "
+        "be a list of tuples, where each tuple consists of a word from the input text and its corresponding"
+        " POS tag label from the POS tag label set provided\nYour response should include only a list of "
+        "tuples, in the order that the words appear in the input sentence, including punctuations, with each tuple containing the "
+        "corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: ",
+        "prompt_5": "Given the following sentence, identify the part of speech (POS) for each word. Use the following "
+        "POS tag set: \nNOUN: Noun (person, place, thing), \nVERB: Verb (action, state), "
+        "\nADJ: Adjective (describes a noun), \nADV: Adverb (modifies a verb, adjective, or adverb), "
+        "\nPRON: Pronoun (replaces a noun), \nDET: Determiner (introduces a noun), "
+        "\nADP: Adposition (preposition or postposition), \nCCONJ: Conjunction (connects words, phrases, clauses)"
+        "\nPUNCT: Punctuation, \nPROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), "
+        "\nSCONJ: Subordinating conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, "
+        "\nNUM: Numeral, \nX: others. The output format should "
+        "be a list of tuples, where each tuple consists of a word from the input text and its corresponding"
+        " POS tag label key only from the POS tag set provided\nYour response should include only a list of "
+        "tuples, in the order that the words appear in the input sentence, including punctuations, with each tuple containing the "
+        "corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "bam": "Bambara",
+        "bbj": "Ghomala",
+        "ewe": "Ewe",
+        "fon": "Fon",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "kin": "Kinyarwanda",
+        "lug": "Luganda",
+        "luo": "Dholuo",
+        "mos": "Mossi",
+        "nya": "Chichewa",
+        "pcm": "Nigerian Pidgin",
+        "sna": "chiShona",
+        "swa": "Kiswahili",
+        "tsn": "Setswana",
+        "twi": "Twi",
+        "wol": "Wolof",
+        "xho": "isiXhosa",
+        "yor": "Yoruba",
+        "zul": "isiZulu",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"masakhapos_{lang}.yaml"
+            task_name = f"masakhapos_{lang}_{mode}"
+            yaml_template = "masakhapos_yaml"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/masakhapos/masakhapos.yaml b/lm_eval/tasks/afrobench/masakhapos/masakhapos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3fb1574eb32a0203198a4d210c788765cf476f34
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/masakhapos.yaml
@@ -0,0 +1,13 @@
+group: masakhapos
+task:
+  - masakhapos_prompt_1
+  - masakhapos_prompt_2
+  - masakhapos_prompt_3
+  - masakhapos_prompt_4
+  - masakhapos_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_bam.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1c64e387ae638c83e30b1172f458c3976d20728
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_bam.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bam
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bam_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_bbj.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..418c8e0ca6c411620056f280d51696e730107c2c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_bbj.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bbj_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_ewe.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1eeb249744fc6f75bb7a08896fa0caaacdc1e84d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ewe_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_fon.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da7eb7aee4ad2a0712cd49cf96546f69e26d8dc8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_fon.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_fon_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_hau.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..431ed8f1656568111d4206a7a33c954b51cfa743
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_ibo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cb171fe3c93c8be6d6bee9b41e6596d769b5deb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_kin.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dced04f22e7424c3d0c4f3a39f4cf58c331f759b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_lug.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e773f6430c0f38d842c63ff8752a78d8a54dd87d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_lug.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_luo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4544e2b1bce03c8f8fc8d0e82c1c6fbeab6f3570
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_luo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_luo_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_mos.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0c7d3f6a3cd272812926812588744bd737dbb51
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_mos.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_mos_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_nya.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8d4fcbf23feecfaa7ef927dc0a2d9c090370469
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_nya.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: nya
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_nya_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_pcm.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d05924ee2ba0c702fbb17de84cce0ed03e536bb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_pcm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_pcm_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_sna.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7afa02f4f8b72801d5d782165a68694ef41cdc5a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_sna.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_sna_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_swa.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab2f123e1a42759c4f600bb90c4f8450cbc84edf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_tsn.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca02f064a837e69871250046a44d5ed63253ec1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_tsn.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: tsn
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_tsn_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_twi.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f22c093639eefc5747c288506d2cb28f90cd6ca6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_twi_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_wol.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e0bdd23a8a2203243fa657388b7eae8a2be1a28b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_wol.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_wol_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_xho.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f712a874546298594bff74f47a85aa67bc5ae23b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_xho.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_xho_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdca7a85d905f3e177b496b139ed9705f1a3e620
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_yaml
@@ -0,0 +1,32 @@
+tag:
+- masakhapos_tasks
+- masakhapos_prompt_1
+dataset_path: masakhane/masakhapos
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: !function utils.doc_to_target
+should_decontaminate: true
+doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
+filter_list:
+  - filter:
+    - function: regex_pos
+    name: flexible-extract
+metric_list:
+  - metric: acc
+    aggregation: !function utils.acc_score
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_yor.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..efa8750a6200a2be388806f4f8da57f52f781b3c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_zul.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..362c9934b856664dc1ca336d8420b170c5532813
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/masakhapos_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Please provide the POS tags for each word in the input sentence. The\
+  \ input will be a list of words in the sentence. The output format should be a list\
+  \ of tuples, where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the tag label set: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. \nYour response should include only a list of tuples, in the order that\
+  \ the words appear in the input sentence, including punctuations, with each tuple\
+  \ containing the corresponding POS tag label for a word. \n\nSentence: {{tokens}}\
+  \ \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_zul_prompt_1
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_1/utils.py b/lm_eval/tasks/afrobench/masakhapos/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ccc66d9cce30c1459494f0d5c21a71d1d3f58d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_1/utils.py
@@ -0,0 +1,55 @@
+from itertools import chain
+
+from sklearn.metrics import accuracy_score
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    pos_tag_map = {
+        0: "NOUN",
+        1: "PUNCT",
+        2: "ADP",
+        3: "NUM",
+        4: "SYM",
+        5: "SCONJ",
+        6: "ADJ",
+        7: "PART",
+        8: "DET",
+        9: "CCONJ",
+        10: "PROPN",
+        11: "PRON",
+        12: "X",
+        13: "_",
+        14: "ADV",
+        15: "INTJ",
+        16: "VERB",
+        17: "AUX",
+    }
+    return [pos_tag_map[tag] for tag in doc["upos"]]
+
+
+def acc_score(items):
+    unzipped_list = list(zip(*items))
+
+    golds, preds = unzipped_list[0], unzipped_list[1]
+
+    # Flatten preds' inner lists
+    flattened_preds = [list(chain.from_iterable(p)) for p in preds]
+
+    # Calculate the accuracy for each gold-pred pair
+    accuracy_scores = []
+    for gold, pred in zip(golds, flattened_preds):
+        # Ensure both lists are of the same length, otherwise truncate to match
+        min_length = min(len(gold), len(pred))
+        gold = gold[:min_length]
+        pred = pred[:min_length]
+
+        # Calculate accuracy for the current pair and add to the list
+        accuracy = accuracy_score(gold, pred)
+        accuracy_scores.append(accuracy)
+
+    mean_accuracy = (
+        sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
+    )
+    return mean_accuracy
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_bam.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bde25d7e5c36fa84add36210bf728999f9dafcb2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_bam.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: bam
+doc_to_text: "You are an expert in tagging words and sentences in Bambara with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Bambara sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bam_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_bbj.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8439e6b03f209094e973cf0f9faddfd1a32495b0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_bbj.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "You are an expert in tagging words and sentences in Ghomala with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Ghomala sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bbj_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_ewe.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ffa2ba95963fbe4cac38e5a419df3e98b140750
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_ewe.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "You are an expert in tagging words and sentences in Ewe with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Ewe sentence. The\
+  \ input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP', 'ADV',\
+  \ 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ewe_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_fon.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..548f2de48255080669b96408c1975eff7958770b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_fon.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: "You are an expert in tagging words and sentences in Fon with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Fon sentence. The\
+  \ input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP', 'ADV',\
+  \ 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_fon_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_hau.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4bc034571803b9fee3f6af8db6e567d64f2a2e61
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_hau.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "You are an expert in tagging words and sentences in Hausa with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Hausa sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_ibo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0f5d357eabfeab7ccd993634be3f2baedfeab84
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_ibo.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "You are an expert in tagging words and sentences in Igbo with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Igbo sentence. The\
+  \ input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP', 'ADV',\
+  \ 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_kin.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95fd232a615dffbd964e0225bd01505bbbd2c396
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_kin.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "You are an expert in tagging words and sentences in Kinyarwanda with\
+  \ the right POS tag. \n\nPlease provide the POS tags for each word in the Kinyarwanda\
+  \ sentence. The input is a list of words in the sentence. POS tag label set: ['ADJ',\
+  \ 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN',\
+  \ 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples,\
+  \ where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the POS tag label set provided\nYour response should include\
+  \ only a list of tuples, in the order that the words appear in the input sentence,\
+  \ including punctuations, with each tuple containing the corresponding POS tag label\
+  \ for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_lug.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21b02b10864503d1437208dc0a56f4ad6bb4e9d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_lug.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "You are an expert in tagging words and sentences in Luganda with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Luganda sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_luo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..42ccb34fec23488a68562f06ebe2e05811f4e057
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_luo.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "You are an expert in tagging words and sentences in Dholuo with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Dholuo sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_luo_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_mos.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cfa74aefef204c134d692d17913371137a696a1b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_mos.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "You are an expert in tagging words and sentences in Mossi with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Mossi sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_mos_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_nya.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..27de8386357d493a950920afb895edd9eb689adf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_nya.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: nya
+doc_to_text: "You are an expert in tagging words and sentences in Chichewa with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Chichewa sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_nya_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_pcm.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c532569d338696c50b8746c4b1ac9ded2b20d22
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_pcm.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are an expert in tagging words and sentences in Nigerian Pidgin\
+  \ with the right POS tag. \n\nPlease provide the POS tags for each word in the Nigerian\
+  \ Pidgin sentence. The input is a list of words in the sentence. POS tag label set:\
+  \ ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON',\
+  \ 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a\
+  \ list of tuples, where each tuple consists of a word from the input text and its\
+  \ corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_pcm_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_sna.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6c6467d81bfd873ed361f1bccab89710ccfd370
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_sna.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "You are an expert in tagging words and sentences in chiShona with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the chiShona sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_sna_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_swa.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1ca8780834ded2c13edc50203f610c1b8147693
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_swa.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "You are an expert in tagging words and sentences in Kiswahili with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Kiswahili\
+  \ sentence. The input is a list of words in the sentence. POS tag label set: ['ADJ',\
+  \ 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN',\
+  \ 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples,\
+  \ where each tuple consists of a word from the input text and its corresponding\
+  \ POS tag label from the POS tag label set provided\nYour response should include\
+  \ only a list of tuples, in the order that the words appear in the input sentence,\
+  \ including punctuations, with each tuple containing the corresponding POS tag label\
+  \ for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_tsn.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a69886646284706e2b4cb11bab61a572efa726b5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_tsn.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: tsn
+doc_to_text: "You are an expert in tagging words and sentences in Setswana with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Setswana sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_tsn_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_twi.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22a6f414cdbd3485cb822a95f8b2a41012174907
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_twi.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "You are an expert in tagging words and sentences in Twi with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Twi sentence. The\
+  \ input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP', 'ADV',\
+  \ 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_twi_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_wol.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e64fcc3dadaf548ecc4f936122dc9f042094fa6e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_wol.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "You are an expert in tagging words and sentences in Wolof with the right\
+  \ POS tag. \n\nPlease provide the POS tags for each word in the Wolof sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_wol_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_xho.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0d8d8deda904adfe211b7a1b138742ba90c57a6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_xho.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "You are an expert in tagging words and sentences in isiXhosa with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the isiXhosa sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_xho_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..044fffdb895a8c2b05ddd96602dc8879b8579b4f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_yaml
@@ -0,0 +1,32 @@
+tag:
+- masakhapos_tasks
+- masakhapos_prompt_2
+dataset_path: masakhane/masakhapos
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: !function utils.doc_to_target
+should_decontaminate: true
+doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
+filter_list:
+  - filter:
+    - function: regex_pos
+    name: flexible-extract
+metric_list:
+  - metric: acc
+    aggregation: !function utils.acc_score
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_yor.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a9d1b78326ba004acfd95ba7f1c1682f240cb6e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_yor.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "You are an expert in tagging words and sentences in Yoruba with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the Yoruba sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_zul.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1aa1ca4c72ad780deb98fcd2a7d76ba4d6221f1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/masakhapos_zul.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "You are an expert in tagging words and sentences in isiZulu with the\
+  \ right POS tag. \n\nPlease provide the POS tags for each word in the isiZulu sentence.\
+  \ The input is a list of words in the sentence. POS tag label set: ['ADJ', 'ADP',\
+  \ 'ADV', 'AUX', 'CCONJ, 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT',\
+  \ 'SCONJ', 'SYM', 'VERB', 'X']. The output format should be a list of tuples, where\
+  \ each tuple consists of a word from the input text and its corresponding POS tag\
+  \ label from the POS tag label set provided\nYour response should include only a\
+  \ list of tuples, in the order that the words appear in the input sentence, including\
+  \ punctuations, with each tuple containing the corresponding POS tag label for a\
+  \ word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_zul_prompt_2
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_2/utils.py b/lm_eval/tasks/afrobench/masakhapos/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ccc66d9cce30c1459494f0d5c21a71d1d3f58d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_2/utils.py
@@ -0,0 +1,55 @@
+from itertools import chain
+
+from sklearn.metrics import accuracy_score
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    pos_tag_map = {
+        0: "NOUN",
+        1: "PUNCT",
+        2: "ADP",
+        3: "NUM",
+        4: "SYM",
+        5: "SCONJ",
+        6: "ADJ",
+        7: "PART",
+        8: "DET",
+        9: "CCONJ",
+        10: "PROPN",
+        11: "PRON",
+        12: "X",
+        13: "_",
+        14: "ADV",
+        15: "INTJ",
+        16: "VERB",
+        17: "AUX",
+    }
+    return [pos_tag_map[tag] for tag in doc["upos"]]
+
+
+def acc_score(items):
+    unzipped_list = list(zip(*items))
+
+    golds, preds = unzipped_list[0], unzipped_list[1]
+
+    # Flatten preds' inner lists
+    flattened_preds = [list(chain.from_iterable(p)) for p in preds]
+
+    # Calculate the accuracy for each gold-pred pair
+    accuracy_scores = []
+    for gold, pred in zip(golds, flattened_preds):
+        # Ensure both lists are of the same length, otherwise truncate to match
+        min_length = min(len(gold), len(pred))
+        gold = gold[:min_length]
+        pred = pred[:min_length]
+
+        # Calculate accuracy for the current pair and add to the list
+        accuracy = accuracy_score(gold, pred)
+        accuracy_scores.append(accuracy)
+
+    mean_accuracy = (
+        sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
+    )
+    return mean_accuracy
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_bam.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64bf664f58c9c3ebf4a5192c9f84909cfd7e97c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_bam.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: bam
+doc_to_text: "Acting as a Bambara linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bam_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_bbj.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50d00b6dd66e1e7f00205a455c6de3f7cc48bc43
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_bbj.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Acting as a Ghomala linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bbj_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_ewe.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c83ad4bad7d7f209c4541c067b8f3254e0869007
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_ewe.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Acting as a Ewe linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ewe_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_fon.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b12efe16d71a494b3f71178a64347167ee315a3c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_fon.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: "Acting as a Fon linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_fon_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_hau.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..613384cf036ccb0232274c55521c30e27ee039b6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_hau.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Acting as a Hausa linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_ibo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d7af7e36e150d1f80abdaee1aea1fb5bf5b093b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_ibo.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Acting as a Igbo linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_kin.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1255d99f002b1aa19209a89db5aefbff5ea69cc5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_kin.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Acting as a Kinyarwanda linguist and without making any corrections\
+  \ or changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_lug.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0eb3fad69db8160e8d43b2803bbe418eda8462b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_lug.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Acting as a Luganda linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_luo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d9ceb84fa771e19e6232a62bfc3b2c092251b55
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_luo.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "Acting as a Dholuo linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_luo_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_mos.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..705e4d512e917aa9e532bebf8781f13b89d44017
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_mos.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "Acting as a Mossi linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_mos_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_nya.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fecb644d99aa1621c9ac5a6f34bcc87de7f0d377
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_nya.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: nya
+doc_to_text: "Acting as a Chichewa linguist and without making any corrections or\
+  \ changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_nya_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_pcm.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9cfc76c52afc0407559e5c4141d57d586a814676
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_pcm.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Acting as a Nigerian Pidgin linguist and without making any corrections\
+  \ or changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_pcm_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_sna.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..947b68fe075c2a24000e0448df429bd12f69f159
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_sna.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Acting as a chiShona linguist and without making any corrections or\
+  \ changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_sna_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_swa.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cc2e6ef31096505c422692b1262d675580de849
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_swa.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Acting as a Kiswahili linguist and without making any corrections or\
+  \ changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_tsn.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a37aa2e611c87e94ca1e4444b7e583244c4598b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_tsn.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: tsn
+doc_to_text: "Acting as a Setswana linguist and without making any corrections or\
+  \ changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_tsn_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_twi.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40bf3c1700a025cfe56a1394d3f4c9dfa4f741be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_twi.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Acting as a Twi linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_twi_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_wol.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97e98aa71dc4a13e913a13717c9749c218eabb3f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_wol.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Acting as a Wolof linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_wol_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_xho.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..72dafcfabbc51210bcc1678c27d88a656cd97416
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_xho.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Acting as a isiXhosa linguist and without making any corrections or\
+  \ changes to the text, perform a part of speech (POS) analysis of the sentences\
+  \ using the following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ,\
+  \ 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM',\
+  \ 'VERB', 'X']. The input will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_xho_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..681b621601ed000230f869f1b8dfcd9a3c5db32a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_yaml
@@ -0,0 +1,32 @@
+tag:
+- masakhapos_tasks
+- masakhapos_prompt_3
+dataset_path: masakhane/masakhapos
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: !function utils.doc_to_target
+should_decontaminate: true
+doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
+filter_list:
+  - filter:
+    - function: regex_pos
+    name: flexible-extract
+metric_list:
+  - metric: acc
+    aggregation: !function utils.acc_score
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_yor.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c11f48aa60f481bb966bbdb2ddba3da5d4c976f6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_yor.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Acting as a Yoruba linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_zul.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d89dcf412e4fb99f1f3d788cbdacdb08fe516806
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/masakhapos_zul.yaml
@@ -0,0 +1,14 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Acting as a isiZulu linguist and without making any corrections or changes\
+  \ to the text, perform a part of speech (POS) analysis of the sentences using the\
+  \ following POS tag label annotation ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input will be a list of words in the sentence. The output format should\
+  \ be a list of tuples, where each tuple consists of a word from the input text and\
+  \ its corresponding POS tag label from the POS tag label set provided\nYour response\
+  \ should include only a list of tuples, in the order that the words appear in the\
+  \ input sentence, including punctuations, with each tuple containing the corresponding\
+  \ POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_zul_prompt_3
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_3/utils.py b/lm_eval/tasks/afrobench/masakhapos/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ccc66d9cce30c1459494f0d5c21a71d1d3f58d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_3/utils.py
@@ -0,0 +1,55 @@
+from itertools import chain
+
+from sklearn.metrics import accuracy_score
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    pos_tag_map = {
+        0: "NOUN",
+        1: "PUNCT",
+        2: "ADP",
+        3: "NUM",
+        4: "SYM",
+        5: "SCONJ",
+        6: "ADJ",
+        7: "PART",
+        8: "DET",
+        9: "CCONJ",
+        10: "PROPN",
+        11: "PRON",
+        12: "X",
+        13: "_",
+        14: "ADV",
+        15: "INTJ",
+        16: "VERB",
+        17: "AUX",
+    }
+    return [pos_tag_map[tag] for tag in doc["upos"]]
+
+
+def acc_score(items):
+    unzipped_list = list(zip(*items))
+
+    golds, preds = unzipped_list[0], unzipped_list[1]
+
+    # Flatten preds' inner lists
+    flattened_preds = [list(chain.from_iterable(p)) for p in preds]
+
+    # Calculate the accuracy for each gold-pred pair
+    accuracy_scores = []
+    for gold, pred in zip(golds, flattened_preds):
+        # Ensure both lists are of the same length, otherwise truncate to match
+        min_length = min(len(gold), len(pred))
+        gold = gold[:min_length]
+        pred = pred[:min_length]
+
+        # Calculate accuracy for the current pair and add to the list
+        accuracy = accuracy_score(gold, pred)
+        accuracy_scores.append(accuracy)
+
+    mean_accuracy = (
+        sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
+    )
+    return mean_accuracy
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_bam.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..318a15074ff7a2624a388347a9b8304032631632
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_bam.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bam
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bam_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_bbj.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24680e2dbfb841086a49469a56b25d32e8efa1ef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_bbj.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bbj_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_ewe.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..748232217a473bbf3e977a8d63722c59bbbfc405
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_ewe.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ewe_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_fon.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2deca67ef9ffbe8af1afdbb783dc130bff2d8c49
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_fon.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_fon_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_hau.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a1f5b77a23e3452a8e865234fe49216cc44984e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_hau.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_ibo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..789b0897fe29df8f65c6ee73e5620ef247352da7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_ibo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_kin.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1486b4fa916864eac76fa5908399696e783fa108
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_kin.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_kin_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_lug.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a80c56029aae6ebc263957323d55c9186c1f503a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_lug.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_lug_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_luo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3136f885164f970a7ce5cc3da802fb6cbb1f51e8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_luo.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_luo_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_mos.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24ae470cacd0ece662ebe5109f3d67d8669741fb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_mos.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_mos_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_nya.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..616c003d477322972eb955fc479ed333bf96001b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_nya.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: nya
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_nya_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_pcm.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dcaae1189f0aeb79f965e37e6f59d8f52a7f1416
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_pcm.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_pcm_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_sna.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07237cee90d2275e0d400695efc13ef077a6fbc3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_sna.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_sna_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_swa.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c937299bf5f7db6bd864be8c718744802f32a834
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_swa.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_tsn.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1bc5ad546a49e699732aab50dde24130a6b9a81
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_tsn.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: tsn
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_tsn_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_twi.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf3a523b9319a84f14f675ab26f88027d4f40315
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_twi.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_twi_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_wol.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d427cee3cdb444f9f5c75c06f27209baca9459fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_wol.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_wol_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_xho.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b6525f98b3ae37542ce700b83caaa072e3f6f3f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_xho.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_xho_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba62938696ba16d383965dbdca203f048b5e0738
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_yaml
@@ -0,0 +1,32 @@
+tag:
+- masakhapos_tasks
+- masakhapos_prompt_4
+dataset_path: masakhane/masakhapos
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: !function utils.doc_to_target
+should_decontaminate: true
+doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
+filter_list:
+  - filter:
+    - function: regex_pos
+    name: flexible-extract
+metric_list:
+  - metric: acc
+    aggregation: !function utils.acc_score
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_yor.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a7d70f674ad0fdd5ddd5a11ae7df41a4b428b738
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_yor.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_zul.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a03cc5d5dc809cab290adbb90d1ef4188d861f7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/masakhapos_zul.yaml
@@ -0,0 +1,13 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Annotate each word in the provided sentence with the appropriate POS\
+  \ tag. The annotation list is given as: ['ADJ', 'ADP', 'ADV', 'AUX', 'CCONJ, 'DET',\
+  \ 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB',\
+  \ 'X']. The input sentence will be a list of words in the sentence. The output format\
+  \ should be a list of tuples, where each tuple consists of a word from the input\
+  \ text and its corresponding POS tag label from the POS tag label set provided\n\
+  Your response should include only a list of tuples, in the order that the words\
+  \ appear in the input sentence, including punctuations, with each tuple containing\
+  \ the corresponding POS tag label for a word. \n\nSentence: {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_zul_prompt_4
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_4/utils.py b/lm_eval/tasks/afrobench/masakhapos/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ccc66d9cce30c1459494f0d5c21a71d1d3f58d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_4/utils.py
@@ -0,0 +1,55 @@
+from itertools import chain
+
+from sklearn.metrics import accuracy_score
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    pos_tag_map = {
+        0: "NOUN",
+        1: "PUNCT",
+        2: "ADP",
+        3: "NUM",
+        4: "SYM",
+        5: "SCONJ",
+        6: "ADJ",
+        7: "PART",
+        8: "DET",
+        9: "CCONJ",
+        10: "PROPN",
+        11: "PRON",
+        12: "X",
+        13: "_",
+        14: "ADV",
+        15: "INTJ",
+        16: "VERB",
+        17: "AUX",
+    }
+    return [pos_tag_map[tag] for tag in doc["upos"]]
+
+
+def acc_score(items):
+    unzipped_list = list(zip(*items))
+
+    golds, preds = unzipped_list[0], unzipped_list[1]
+
+    # Flatten preds' inner lists
+    flattened_preds = [list(chain.from_iterable(p)) for p in preds]
+
+    # Calculate the accuracy for each gold-pred pair
+    accuracy_scores = []
+    for gold, pred in zip(golds, flattened_preds):
+        # Ensure both lists are of the same length, otherwise truncate to match
+        min_length = min(len(gold), len(pred))
+        gold = gold[:min_length]
+        pred = pred[:min_length]
+
+        # Calculate accuracy for the current pair and add to the list
+        accuracy = accuracy_score(gold, pred)
+        accuracy_scores.append(accuracy)
+
+    mean_accuracy = (
+        sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
+    )
+    return mean_accuracy
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_bam.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4cd65c90efa0d394c0e613e62dee4c6d95dce124
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_bam.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: bam
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bam_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_bbj.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_bbj.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..969406dcbd1b4863244ba19446bf846eda017e8f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_bbj.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: bbj
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_bbj_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_ewe.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aacc83ee0f47aec3f6dd93fadacdc12177a24cfd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_ewe.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ewe
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ewe_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_fon.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..642d1d0acd90761c2cbb04d3987dafa43e9ab1f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_fon.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: fon
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_fon_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_hau.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2c07ce71d205c9a4236fbd2777ef44d624683e5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_hau.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_ibo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bef4b9941243e2f41332bb2410bd42a815e497bb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_ibo.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_kin.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1983540b6a1d4dc21930d883d81fd53e778ca6a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_kin.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kin
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_kin_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_lug.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55b9210a54621ee792db781b085a208f8384b0ba
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_lug.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: lug
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_lug_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_luo.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a17e407c3f20cd80bae0b9673455fc242cfa19c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_luo.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: luo
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_luo_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_mos.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43479749d5848f86898081e8ba751942f44b74e2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_mos.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: mos
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_mos_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_nya.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7d2d0ec114db2080efcfbc76c1d63511d2a9ae07
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_nya.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: nya
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_nya_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_pcm.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cd5ea9278b841721283b09b5920f8d395674b81f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_pcm.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_pcm_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_sna.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3cc21f0cf87014b3eea6e0e6dcddbc38450066fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_sna.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: sna
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_sna_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_swa.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b08dacdef6912bf10bc3136726f28229eeb43d30
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_swa.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: swa
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_tsn.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_tsn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbc20d6ea0ab4e613dc077eac01227c1d8ca198a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_tsn.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: tsn
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_tsn_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_twi.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..11af3b877758759dc4d4eb34fbf8f99421d54f7b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_twi.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: twi
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_twi_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_wol.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca294724bced107ce05490ba52be94f9d73b5f74
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_wol.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: wol
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_wol_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_xho.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..345354c3c3bf7efdca51ee328c23b29f26dd5daa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_xho.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: xho
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_xho_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df148e8a8ab567d65dc12a36f60a0b3f753b8c86
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_yaml
@@ -0,0 +1,32 @@
+tag:
+- masakhapos_tasks
+- masakhapos_prompt_5
+dataset_path: masakhane/masakhapos
+dataset_name: null
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+generation_kwargs:
+  do_sample: false
+  until:
+  - </s>
+  - <|im_end|>
+validation_split: validation
+test_split: test
+fewshot_split: train
+doc_to_target: !function utils.doc_to_target
+should_decontaminate: true
+doc_to_decontamination_query: "Sentence: {{token}}\nOutput:"
+filter_list:
+  - filter:
+    - function: regex_pos
+    name: flexible-extract
+metric_list:
+  - metric: acc
+    aggregation: !function utils.acc_score
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_yor.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84bb266af31906017d066df113e7ca999579f744
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_yor.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_zul.yaml b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e400bfe74d1505f9335dcd6baf3ff21c949b8b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/masakhapos_zul.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: zul
+doc_to_text: "Given the following sentence, identify the part of speech (POS) for\
+  \ each word. Use the following POS tag set: \nNOUN: Noun (person, place, thing),\
+  \ \nVERB: Verb (action, state), \nADJ: Adjective (describes a noun), \nADV: Adverb\
+  \ (modifies a verb, adjective, or adverb), \nPRON: Pronoun (replaces a noun), \n\
+  DET: Determiner (introduces a noun), \nADP: Adposition (preposition or postposition),\
+  \ \nCCONJ: Conjunction (connects words, phrases, clauses)\nPUNCT: Punctuation, \n\
+  PROPN: Proper Noun, \nAUX: Auxiliary verb (helper verb), \nSCONJ: Subordinating\
+  \ conjunction \nPART: Particle, \nSYM: Symbol, \nINTJ: Interjection, \nNUM: Numeral,\
+  \ \nX: others. The output format should be a list of tuples, where each tuple consists\
+  \ of a word from the input text and its corresponding POS tag label key only from\
+  \ the POS tag set provided\nYour response should include only a list of tuples,\
+  \ in the order that the words appear in the input sentence, including punctuations,\
+  \ with each tuple containing the corresponding POS tag label for a word. \n\nSentence:\
+  \ {{tokens}} \nOutput: "
+include: masakhapos_yaml
+task: masakhapos_zul_prompt_5
diff --git a/lm_eval/tasks/afrobench/masakhapos/prompt_5/utils.py b/lm_eval/tasks/afrobench/masakhapos/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ccc66d9cce30c1459494f0d5c21a71d1d3f58d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/prompt_5/utils.py
@@ -0,0 +1,55 @@
+from itertools import chain
+
+from sklearn.metrics import accuracy_score
+
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_target(doc):
+    pos_tag_map = {
+        0: "NOUN",
+        1: "PUNCT",
+        2: "ADP",
+        3: "NUM",
+        4: "SYM",
+        5: "SCONJ",
+        6: "ADJ",
+        7: "PART",
+        8: "DET",
+        9: "CCONJ",
+        10: "PROPN",
+        11: "PRON",
+        12: "X",
+        13: "_",
+        14: "ADV",
+        15: "INTJ",
+        16: "VERB",
+        17: "AUX",
+    }
+    return [pos_tag_map[tag] for tag in doc["upos"]]
+
+
+def acc_score(items):
+    unzipped_list = list(zip(*items))
+
+    golds, preds = unzipped_list[0], unzipped_list[1]
+
+    # Flatten preds' inner lists
+    flattened_preds = [list(chain.from_iterable(p)) for p in preds]
+
+    # Calculate the accuracy for each gold-pred pair
+    accuracy_scores = []
+    for gold, pred in zip(golds, flattened_preds):
+        # Ensure both lists are of the same length, otherwise truncate to match
+        min_length = min(len(gold), len(pred))
+        gold = gold[:min_length]
+        pred = pred[:min_length]
+
+        # Calculate accuracy for the current pair and add to the list
+        accuracy = accuracy_score(gold, pred)
+        accuracy_scores.append(accuracy)
+
+    mean_accuracy = (
+        sum(accuracy_scores) / len(accuracy_scores) if accuracy_scores else 0
+    )
+    return mean_accuracy
diff --git a/lm_eval/tasks/afrobench/masakhapos/utils.py b/lm_eval/tasks/afrobench/masakhapos/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7976f846c42a3b8d347553cacc97779dea15671
--- /dev/null
+++ b/lm_eval/tasks/afrobench/masakhapos/utils.py
@@ -0,0 +1,40 @@
+from lm_eval.utils import weighted_f1_score
+
+
+def doc_to_text(doc):
+    output = """Please provide the POS tags for each word in the input sentence. The input will be a list of words in
+    the sentence. The output format should be a list of tuples, where each tuple consists of a word from the input text
+    and its corresponding POS tag label from the tag label set: ["ADJ", "ADP", "ADV", "AUX", "CCONJ, "DET", "INTJ",
+    "NOUN", "NUM", "PART", "PRON", "PROPN", "PUNCT" "SCONJ", "SYM", "VERB", "X"]. \nYour response should include only a
+    list of tuples, in the order that the words appear in the input sentence, with each tuple containing the
+    corresponding POS tag label for a word.
+
+    Input: {tokens}
+    Output: """
+
+    text = output.format(subject=doc["tokens"])
+    return text
+
+
+def doc_to_target(doc):
+    pos_tag_map = {
+        0: "NOUN",
+        1: "PUNCT",
+        2: "ADP",
+        3: "NUM",
+        4: "SYM",
+        5: "SCONJ",
+        6: "ADJ",
+        7: "PART",
+        8: "DET",
+        9: "CCONJ",
+        10: "PROPN",
+        11: "PRON",
+        12: "X",
+        13: "_",
+        14: "ADV",
+        15: "INTJ",
+        16: "VERB",
+        17: "AUX",
+    }
+    return [pos_tag_map[tag] for tag in doc["upos"]]
diff --git a/lm_eval/tasks/afrobench/naijarc/README.md b/lm_eval/tasks/afrobench/naijarc/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f6f98178b8ee2a0f60e818a93d520fb67d748bce
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/README.md
@@ -0,0 +1,25 @@
+#
+
+## Paper
+Title: `NaijaRC: A Multi-choice Reading Comprehension Dataset for Nigerian Languages`
+
+Paper Link: https://arxiv.org/abs/2308.09768
+
+## Abstract
+>In this paper, we create NaijaRC: a new multi-choice Reading Comprehension dataset for three native Nigeria languages that is based on high-school reading comprehension examination. We provide baseline results by performing cross-lingual transfer using existing English RACE and Belebele training dataset based on a pre-trained encoder-only model. Additionally, we provide results by prompting large language models (LLMs) like GPT-4.
+
+HomePage: https://huggingface.co/datasets/aremuadeolajr/NaijaRC
+
+### Citation
+
+```
+@misc{aremu2024naijarcmultichoicereadingcomprehension,
+      title={NaijaRC: A Multi-choice Reading Comprehension Dataset for Nigerian Languages},
+      author={Anuoluwapo Aremu and Jesujoba O. Alabi and Daud Abolade and Nkechinyere F. Aguobi and Shamsuddeen Hassan Muhammad and David Ifeoluwa Adelani},
+      year={2024},
+      eprint={2308.09768},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2308.09768},
+}
+```
diff --git a/lm_eval/tasks/afrobench/naijarc/naijarc.yaml b/lm_eval/tasks/afrobench/naijarc/naijarc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4230ed64941418151913be985ebd809060ebe6a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/naijarc.yaml
@@ -0,0 +1,13 @@
+group: naijarc
+task:
+  - naijarc_prompt_1
+  - naijarc_prompt_2
+  - naijarc_prompt_3
+  - naijarc_prompt_4
+  - naijarc_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc
new file mode 100644
index 0000000000000000000000000000000000000000..b077e3bb5c92cd6aaade7621b93511bf2851ab72
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc
@@ -0,0 +1,24 @@
+tag:
+    - naijarc_tasks
+    - naijarc_prompt_1
+    - RC_tasks
+dataset_path: Davlan/NaijaRC
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_hau.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1144a9a2d58eab36de778b1939c6b925e671210d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_hau.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'P: {{story}}
+
+  Q: {{question.strip()}}
+
+  A: {{options_A}}
+
+  B: {{options_B}}
+
+  C: {{options_C}}
+
+  D: {{options_D}}
+
+  Please choose the correct answer from the options above:'
+include: naijarc
+task: naijarc_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_ibo.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1db685234f5dc17f4cf6ac355a802d4d9329d191
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_ibo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'P: {{story}}
+
+  Q: {{question.strip()}}
+
+  A: {{options_A}}
+
+  B: {{options_B}}
+
+  C: {{options_C}}
+
+  D: {{options_D}}
+
+  Please choose the correct answer from the options above:'
+include: naijarc
+task: naijarc_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_yor.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2bb83fea0ad9cb686266f87d064a1f4902984288
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_1/naijarc_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'P: {{story}}
+
+  Q: {{question.strip()}}
+
+  A: {{options_A}}
+
+  B: {{options_B}}
+
+  C: {{options_C}}
+
+  D: {{options_D}}
+
+  Please choose the correct answer from the options above:'
+include: naijarc
+task: naijarc_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc
new file mode 100644
index 0000000000000000000000000000000000000000..3a8ec09a94a68295544a7afc613b34f96f4f7082
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc
@@ -0,0 +1,23 @@
+tag:
+    - naijarc_tasks
+    - naijarc_prompt_2
+    - RC_tasks
+dataset_path: Davlan/NaijaRC
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_hau.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a1d94db4025c21351b28a9a538efb77cb18aaadf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_hau.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Passage: {{story}}
+
+  Question: {{question.strip()}}
+
+  1: {{options_A}}
+
+  2: {{options_B}}
+
+  3: {{options_C}}
+
+  4: {{options_D}}
+
+  Please select the correct answer from the given choices:'
+include: naijarc
+task: naijarc_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_ibo.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8384fad18343389dd8a22a1b7d2ae21e1de0e22e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_ibo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Passage: {{story}}
+
+  Question: {{question.strip()}}
+
+  1: {{options_A}}
+
+  2: {{options_B}}
+
+  3: {{options_C}}
+
+  4: {{options_D}}
+
+  Please select the correct answer from the given choices:'
+include: naijarc
+task: naijarc_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_yor.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..88b1c198185945ce82a619f7b06b5777d27083aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_2/naijarc_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Passage: {{story}}
+
+  Question: {{question.strip()}}
+
+  1: {{options_A}}
+
+  2: {{options_B}}
+
+  3: {{options_C}}
+
+  4: {{options_D}}
+
+  Please select the correct answer from the given choices:'
+include: naijarc
+task: naijarc_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc
new file mode 100644
index 0000000000000000000000000000000000000000..06746a4314ecf5700b09020482eda0698fe2a126
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc
@@ -0,0 +1,23 @@
+tag:
+    - naijarc_tasks
+    - naijarc_prompt_3
+    - RC_tasks
+dataset_path: Davlan/NaijaRC
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_hau.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb4b443124950e9ba6a7df1896111a68a257e7ed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_hau.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Context: {{story}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{options_A}}
+
+  Option B: {{options_B}}
+
+  Option C: {{options_C}}
+
+  Option D: {{options_D}}
+
+  Please indicate the correct option from the list above:'
+include: naijarc
+task: naijarc_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_ibo.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dad37fe953e6056fa58a9dd006d5d79de29002a7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_ibo.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Context: {{story}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{options_A}}
+
+  Option B: {{options_B}}
+
+  Option C: {{options_C}}
+
+  Option D: {{options_D}}
+
+  Please indicate the correct option from the list above:'
+include: naijarc
+task: naijarc_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_yor.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ab84a8b5dcaf181c72b1db050f53281eeb26600
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_3/naijarc_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Context: {{story}}
+
+  Query: {{question.strip()}}
+
+  Option A: {{options_A}}
+
+  Option B: {{options_B}}
+
+  Option C: {{options_C}}
+
+  Option D: {{options_D}}
+
+  Please indicate the correct option from the list above:'
+include: naijarc
+task: naijarc_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc
new file mode 100644
index 0000000000000000000000000000000000000000..27bbc8c90c54954073b905cb3161bab83a83a203
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc
@@ -0,0 +1,23 @@
+tag:
+    - naijarc_tasks
+    - naijarc_prompt_4
+    - RC_tasks
+dataset_path: Davlan/NaijaRC
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_hau.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f846a8cf42bcb903dbf957218996db34cccf4ea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_hau.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: '{{story}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{options_A}}
+
+  B) {{options_B}}
+
+  C) {{options_C}}
+
+  D) {{options_D}}
+
+  Please provide the correct answer from the choices given:'
+include: naijarc
+task: naijarc_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_ibo.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..926d7a8f1615a83902e98ff65633e0fd19838d8d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_ibo.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: '{{story}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{options_A}}
+
+  B) {{options_B}}
+
+  C) {{options_C}}
+
+  D) {{options_D}}
+
+  Please provide the correct answer from the choices given:'
+include: naijarc
+task: naijarc_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_yor.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13ad793cbdd9544de9cc50c861ef72c04226f32b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_4/naijarc_yor.yaml
@@ -0,0 +1,21 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: '{{story}}
+
+  Based on the above passage, answer the following question:
+
+  {{question.strip()}}
+
+  Choices:
+
+  A) {{options_A}}
+
+  B) {{options_B}}
+
+  C) {{options_C}}
+
+  D) {{options_D}}
+
+  Please provide the correct answer from the choices given:'
+include: naijarc
+task: naijarc_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc
new file mode 100644
index 0000000000000000000000000000000000000000..0aa06d3452b44af6333b30ffd82f5ae610440ec2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc
@@ -0,0 +1,23 @@
+tag:
+    - naijarc_tasks
+    - naijarc_prompt_5
+    - RC_tasks
+dataset_path: Davlan/NaijaRC
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_hau.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6ba82f92825183d3c78079d68cd2a44444dde95
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_hau.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: hau
+doc_to_text: 'Read the passage: {{story}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{options_A}}
+
+  B. {{options_B}}
+
+  C. {{options_C}}
+
+  D. {{options_D}}
+
+  Please choose the correct option from the above list:'
+include: naijarc
+task: naijarc_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_ibo.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b527dc1f70c59aef74de17aa82052978658ddf97
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_ibo.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: ibo
+doc_to_text: 'Read the passage: {{story}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{options_A}}
+
+  B. {{options_B}}
+
+  C. {{options_C}}
+
+  D. {{options_D}}
+
+  Please choose the correct option from the above list:'
+include: naijarc
+task: naijarc_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_yor.yaml b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0959e3277d10fb768565622143eee4e9728fd3c1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/prompt_5/naijarc_yor.yaml
@@ -0,0 +1,19 @@
+# Generated by utils.py
+dataset_name: yor
+doc_to_text: 'Read the passage: {{story}}
+
+  Then answer the question: {{question.strip()}}
+
+  Options:
+
+  A. {{options_A}}
+
+  B. {{options_B}}
+
+  C. {{options_C}}
+
+  D. {{options_D}}
+
+  Please choose the correct option from the above list:'
+include: naijarc
+task: naijarc_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/naijarc/utils.py b/lm_eval/tasks/afrobench/naijarc/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad636a8e882286a7b504e6889c083fb7d8e36ad3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/naijarc/utils.py
@@ -0,0 +1,93 @@
+import argparse
+import os
+
+import yaml
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "P: {{story}}\nQ: {{question.strip()}}\nA: {{options_A}}\nB: {{options_B}}\nC: {{options_C}}\nD: {{options_D}}\nPlease choose the correct answer from the options above:",
+        "prompt_2": "Passage: {{story}}\nQuestion: {{question.strip()}}\n1: {{options_A}}\n2: {{options_B}}\n3: {{options_C}}\n4: {{options_D}}\nPlease select the correct answer from the given choices:",
+        "prompt_3": "Context: {{story}}\nQuery: {{question.strip()}}\nOption A: {{options_A}}\nOption B: {{options_B}}\nOption C: {{options_C}}\nOption D: {{options_D}}\nPlease indicate the correct option from the list above:",
+        "prompt_4": "{{story}}\nBased on the above passage, answer the following question:\n{{question.strip()}}\nChoices:\nA) {{options_A}}\nB) {{options_B}}\nC) {{options_C}}\nD) {{options_D}}\nPlease provide the correct answer from the choices given:",
+        "prompt_5": "Read the passage: {{story}}\nThen answer the question: {{question.strip()}}\nOptions:\nA. {{options_A}}\nB. {{options_B}}\nC. {{options_C}}\nD. {{options_D}}\nPlease choose the correct option from the above list:",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "yor": "Yoruba",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"naijarc_{lang}.yaml"
+            task_name = f"naijarc_{lang}_{mode}"
+            yaml_template = "naijarc"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/nollysenti/README.md b/lm_eval/tasks/afrobench/nollysenti/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fa2413190b57192fe7a4a4250bf9fb41eb5950a3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/README.md
@@ -0,0 +1,35 @@
+#
+
+## Paper
+Title: `NollySenti: Leveraging Transfer Learning and Machine Translation for Nigerian Movie Sentiment Classification`
+
+Paper Link: https://aclanthology.org/2023.acl-short.85/
+
+## Abstract
+>Africa has over 2000 indigenous languages but they are under-represented in NLP research due to lack of datasets. In recent years, there have been progress in developing labelled corpora for African languages. However, they are often available in a single domain and may not generalize to other domains. In this paper, we focus on the task of sentiment classification for cross-domain adaptation. We create a new dataset, Nollywood movie reviews for five languages widely spoken in Nigeria (English, Hausa, Igbo, Nigerian Pidgin, and Yoruba). We provide an extensive empirical evaluation using classical machine learning methods and pre-trained language models. By leveraging transfer learning, we compare the performance of cross-domain adaptation from Twitter domain, and cross-lingual adaptation from English language. Our evaluation shows that transfer from English in the same target domain leads to more than 5% improvement in accuracy compared to transfer from Twitter in the same language. To further mitigate the domain difference, we leverage machine translation from English to other Nigerian languages, which leads to a further improvement of 7% over cross-lingual evaluation. While machine translation to low-resource languages are often of low quality, our analysis shows that sentiment related words are often preserved.
+
+HomePage: https://github.com/IyanuSh/NollySenti
+
+### Citation
+
+```
+@inproceedings{shode-etal-2023-nollysenti,
+    title = "{N}olly{S}enti: Leveraging Transfer Learning and Machine Translation for {N}igerian Movie Sentiment Classification",
+    author = "Shode, Iyanuoluwa  and
+      Adelani, David Ifeoluwa  and
+      Peng, JIng  and
+      Feldman, Anna",
+    editor = "Rogers, Anna  and
+      Boyd-Graber, Jordan  and
+      Okazaki, Naoaki",
+    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
+    month = jul,
+    year = "2023",
+    address = "Toronto, Canada",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.acl-short.85/",
+    doi = "10.18653/v1/2023.acl-short.85",
+    pages = "986--998",
+    abstract = "Africa has over 2000 indigenous languages but they are under-represented in NLP research due to lack of datasets. In recent years, there have been progress in developing labelled corpora for African languages. However, they are often available in a single domain and may not generalize to other domains. In this paper, we focus on the task of sentiment classification for cross-domain adaptation. We create a new dataset, Nollywood movie reviews for five languages widely spoken in Nigeria (English, Hausa, Igbo, Nigerian Pidgin, and Yoruba). We provide an extensive empirical evaluation using classical machine learning methods and pre-trained language models. By leveraging transfer learning, we compare the performance of cross-domain adaptation from Twitter domain, and cross-lingual adaptation from English language. Our evaluation shows that transfer from English in the same target domain leads to more than 5{\%} improvement in accuracy compared to transfer from Twitter in the same language. To further mitigate the domain difference, we leverage machine translation from English to other Nigerian languages, which leads to a further improvement of 7{\%} over cross-lingual evaluation. While machine translation to low-resource languages are often of low quality, our analysis shows that sentiment related words are often preserved."
+}
+```
diff --git a/lm_eval/tasks/afrobench/nollysenti/nollysenti.yaml b/lm_eval/tasks/afrobench/nollysenti/nollysenti.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7fb1326258af24566aff25c0478f9cba513fd8b7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/nollysenti.yaml
@@ -0,0 +1,13 @@
+group: nollysenti
+task:
+  - nollysenti_prompt_1
+  - nollysenti_prompt_2
+  - nollysenti_prompt_3
+  - nollysenti_prompt_4
+  - nollysenti_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti
new file mode 100644
index 0000000000000000000000000000000000000000..0476cdc0e8a5f5fc3a886423f5b0052c0918b4c9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti
@@ -0,0 +1,38 @@
+tag:
+    - afrobench_sentiment_tasks
+    - nollysenti_prompt_1
+dataset_path: Davlan/nollysenti
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_text: 'Does this movie description "{{review}}" have a Positive or Negative sentiment? Labels only\n'
+doc_to_target: label
+doc_to_choice:
+    - "positive"
+    - "negative"
+should_decontaminate: true
+doc_to_decontamination_query: review
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_eng.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5cf3a85f0dc5b40221d33dedad85f669055f913e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_eng.yaml
@@ -0,0 +1,3 @@
+dataset_name: en
+include: nollysenti
+task: nollysenti_eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_hau.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..157e97dbe5106cdad11dfc3202d08663816f0730
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_hau.yaml
@@ -0,0 +1,3 @@
+dataset_name: ha
+include: nollysenti
+task: nollysenti_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_ibo.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77c9bfd45f08c0876cf19b4da09d6d5cbc29e3c4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_ibo.yaml
@@ -0,0 +1,3 @@
+dataset_name: ig
+include: nollysenti
+task: nollysenti_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_pcm.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..536301688c02f9ca8ef4f576d9874ad624abe8fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_pcm.yaml
@@ -0,0 +1,3 @@
+dataset_name: pcm
+include: nollysenti
+task: nollysenti_pcm_prompt_1
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_yor.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6dc1cfabadc7019a92b7d023982641ac60a0b9c2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/nollysenti_yor.yaml
@@ -0,0 +1,3 @@
+dataset_name: yo
+include: nollysenti
+task: nollysenti_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_1/utils.py b/lm_eval/tasks/afrobench/nollysenti/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_1/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti
new file mode 100644
index 0000000000000000000000000000000000000000..76f664fee41316e4b8cf10faca4498c1e1c22916
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti
@@ -0,0 +1,37 @@
+tag:
+    - afrobench_sentiment_tasks
+    - nollysenti_prompt_2
+dataset_path: Davlan/nollysenti
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "positive"
+    - "negative"
+should_decontaminate: true
+doc_to_decontamination_query: review
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_eng.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac3bb04d137a207aad2ac307bd2eefc7e5effc2d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_eng.yaml
@@ -0,0 +1,4 @@
+dataset_name: en
+include: nollysenti
+doc_to_text: 'Does this English movie description; "{{review}}" have a Positive or Negative sentiment? Labels only\n'
+task: nollysenti_eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_hau.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f87bce673c68bacdcf3e516bb58c116ada8209e4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_hau.yaml
@@ -0,0 +1,4 @@
+dataset_name: ha
+include: nollysenti
+doc_to_text: 'Does this Hausa movie description; "{{review}}" have a Positive or Negative sentiment? Labels only\n'
+task: nollysenti_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_ibo.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f7ae185dff1e0108d5d4b6d0bd5fa318c3c182b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_ibo.yaml
@@ -0,0 +1,4 @@
+dataset_name: ig
+include: nollysenti
+doc_to_text: 'Does this Igbo movie description; "{{review}}" have a Positive or Negative sentiment? Labels only\n'
+task: nollysenti_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_pcm.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0305c7673fc5f2a527f96205a2b6730efff4db3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_pcm.yaml
@@ -0,0 +1,4 @@
+dataset_name: pcm
+include: nollysenti
+doc_to_text: 'Does this Naija Pidgin movie description; "{{review}}" have a Positive or Negative sentiment? Labels only\n'
+task: nollysenti_pcm_prompt_2
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_yor.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..03c89d8bd05dec45bfc07f5af8c2dc8ed76388ae
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/nollysenti_yor.yaml
@@ -0,0 +1,4 @@
+dataset_name: yo
+include: nollysenti
+doc_to_text: 'Does this Yoruba movie description; "{{review}}" have a Positive or Negative sentiment? Labels only\n'
+task: nollysenti_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_2/utils.py b/lm_eval/tasks/afrobench/nollysenti/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_2/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti
new file mode 100644
index 0000000000000000000000000000000000000000..472928acdc7b964d60fbd0eb992af298319afcc4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti
@@ -0,0 +1,37 @@
+tag:
+    - afrobench_sentiment_tasks
+    - nollysenti_prompt_3
+dataset_path: Davlan/nollysenti
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "positive"
+    - "negative"
+should_decontaminate: true
+doc_to_decontamination_query: review
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_eng.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df21a145c99fb1e7612868276e481724503460bc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: en
+doc_to_text: "You are an assistant able to detect sentiment in movie reviews. \n\nGiven\
+  \ the sentiment labels Positive or Negative; what is the sentiment of the\
+  \ English statement below? Return only the labels\n\nReview: {{review}}\n"
+include: nollysenti
+task: nollysenti_eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_hau.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d15488d6e25022a68dae9874a3b77598fd22dc0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "You are an assistant able to detect sentiment in movie reviews. \n\nGiven\
+  \ the sentiment labels Positive or Negative; what is the sentiment of the\
+  \ Hausa statement below? Return only the labels\n\nReview: {{review}}\n"
+include: nollysenti
+task: nollysenti_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_ibo.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f6bb7b29581858b860b5919afbab5e5b22ebc28
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "You are an assistant able to detect sentiment in movie reviews. \n\nGiven\
+  \ the sentiment labels Positive or Negative; what is the sentiment of the\
+  \ Igbo statement below? Return only the labels\n\nReview: {{review}}\n"
+include: nollysenti
+task: nollysenti_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_pcm.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f98519f3ed329da73ab2272fd33305670d8f2ec1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_pcm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are an assistant able to detect sentiment in movie reviews. \n\nGiven\
+  \ the sentiment labels Positive or Negative; what is the sentiment of the\
+  \ Naija Pidgin statement below? Return only the labels\n\nReview: {{review}}\n"
+include: nollysenti
+task: nollysenti_pcm_prompt_3
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_yor.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd64d1eda4fa7048690527046e71c6af21eb0d51
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/nollysenti_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "You are an assistant able to detect sentiment in movie reviews. \n\nGiven\
+  \ the sentiment labels Positive or Negative; what is the sentiment of the\
+  \ Yoruba statement below? Return only the labels\n\nReview: {{review}}\n"
+include: nollysenti
+task: nollysenti_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_3/utils.py b/lm_eval/tasks/afrobench/nollysenti/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_3/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti
new file mode 100644
index 0000000000000000000000000000000000000000..de1bb486dc1c84ea828d1cb99deb16af6e3f1644
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti
@@ -0,0 +1,37 @@
+tag:
+    - afrobench_sentiment_tasks
+    - nollysenti_prompt_4
+dataset_path: Davlan/nollysenti
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "positive"
+    - "negative"
+should_decontaminate: true
+doc_to_decontamination_query: review
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_eng.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8e01ab6efb4450b392b7d6278088c7f74114f61
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: en
+doc_to_text: "Label the following text as Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_eng_prompt_4
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_hau.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abc9570484fbd79acebb9ba2b7be840bb9391c4d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_hau.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "Label the following text as Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_ibo.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8962cf729075203d9c853470791aa15f7eb97023
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "Label the following text as Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_pcm.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..36d43b795461972411b56413b1bc11386cc34d78
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_pcm.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "Label the following text as Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_pcm_prompt_4
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_yor.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c100c4dd367e2d610e8881d0d7d932c3473f38c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/nollysenti_yor.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "Label the following text as Positive, or Negative. Provide\
+  \ only the label as your response. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_4/utils.py b/lm_eval/tasks/afrobench/nollysenti/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_4/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti
new file mode 100644
index 0000000000000000000000000000000000000000..2e25f2f088edcb81f754f3b7fd7f9a5e92e18b12
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti
@@ -0,0 +1,37 @@
+tag:
+    - afrobench_sentiment_tasks
+    - nollysenti_prompt_5
+dataset_path: Davlan/nollysenti
+dataset_kwargs: {trust_remote_code: True}
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: label
+doc_to_choice:
+    - "positive"
+    - "negative"
+should_decontaminate: true
+doc_to_decontamination_query: review
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_eng.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d485ffe154c61f91924a5c0015e5defeb8ea83a2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_eng.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: en
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ English text. For each input, classify the sentiment as positive, negative.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_eng_prompt_5
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_hau.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ed16af77a33c39aa1569a38047ef92091837152
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_hau.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: ha
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Hausa text. For each input, classify the sentiment as positive, negative.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input.\n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_ibo.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c75f26900298951c5934b17964ca0cd744d86726
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_ibo.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: ig
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Igbo text. For each input, classify the sentiment as positive, negative.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_pcm.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_pcm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29b5cda0b66b083a2cbcdf8d6750d447e7890519
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_pcm.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: pcm
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Naija Pidgin text. For each input, classify the sentiment as positive, negative.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_pcm_prompt_5
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_yor.yaml b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1aea0284e191356e15db16036a4d1abfbc1c5aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/nollysenti_yor.yaml
@@ -0,0 +1,12 @@
+# Generated by utils.py
+dataset_name: yo
+doc_to_text: "You are tasked with performing sentiment classification on the following\
+  \ Yoruba text. For each input, classify the sentiment as positive, negative.\
+  \ Use the following guidelines: \n\n Positive: The text expresses happiness,\
+  \ satisfaction, or optimism. \nNegative: The text conveys disappointment, dissatisfaction,\
+  \ or pessimism. \n\nIf the text contains both positive and negative sentiments, choose\
+  \ the dominant sentiment. For ambiguous or unclear sentiments, select the label\
+  \ that best reflects the overall tone. Please provide a single classification for\
+  \ each input. \n\ntext: {{review}} \nlabel: \n"
+include: nollysenti
+task: nollysenti_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/nollysenti/prompt_5/utils.py b/lm_eval/tasks/afrobench/nollysenti/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/nollysenti/prompt_5/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/ntrex/README.md b/lm_eval/tasks/afrobench/ntrex/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d68cf8c99cb4d7cb8c68eb7d015e6cb26daca3cb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/README.md
@@ -0,0 +1,38 @@
+#
+
+## Paper
+Title: `NTREX-128 – News Test References for MT Evaluation of 128 Languages`
+
+Paper Link: https://aclanthology.org/2022.sumeval-1.4/
+
+## Abstract
+>We release NTREX-128, a data set for machine translation (MT) evaluation from English into a total of 128 target languages. The paper describes the data creation process and proposes a quality filtering method based on human evaluation. We show experimental results which confirm that the directionality of test sets translation indeed plays an important role wrt. the usefulness of the corresponding metrics’ scores. Thus, we recommend that the NTREX-128 data set should be used for evaluation of Englishsourced translation models but not in reverse direction. The test set release introduces another benchmark for the evaluation of massively multilingual machine translation research.
+
+HomePage: https://github.com/MicrosoftTranslator/NTREX
+
+### Citation
+
+```
+@inproceedings{federmann-etal-2022-ntrex,
+    title = "{NTREX}-128 {--} News Test References for {MT} Evaluation of 128 Languages",
+    author = "Federmann, Christian  and
+      Kocmi, Tom  and
+      Xin, Ying",
+    editor = "Ahuja, Kabir  and
+      Anastasopoulos, Antonios  and
+      Patra, Barun  and
+      Neubig, Graham  and
+      Choudhury, Monojit  and
+      Dandapat, Sandipan  and
+      Sitaram, Sunayana  and
+      Chaudhary, Vishrav",
+    booktitle = "Proceedings of the First Workshop on Scaling Up Multilingual Evaluation",
+    month = nov,
+    year = "2022",
+    address = "Online",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2022.sumeval-1.4/",
+    doi = "10.18653/v1/2022.sumeval-1.4",
+    pages = "21--24"
+}
+```
diff --git a/lm_eval/tasks/afrobench/ntrex/gen_utils.py b/lm_eval/tasks/afrobench/ntrex/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba549de25b69b0892f6e80c923c44f7ca001cd79
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/gen_utils.py
@@ -0,0 +1,171 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang, lang_dict):
+    language_column_name = f"sentence_{lang}"
+    prompt_map = {
+        "prompt_1": f"{lang_dict[lang]}: {{{{{language_column_name}}}}} \nEnglish: ",
+        "prompt_1_reverse": f"English: {{{{sentence_eng_Latn}}}} \n{lang_dict[lang]}: ",
+        "prompt_2": f"You are a translation expert. Translate the following {lang_dict[lang]} sentences to English \n"
+        f"{lang_dict[lang]}: {{{{{language_column_name}}}}}\nEnglish: ",
+        "prompt_2_reverse": f"You are a translation expert. Translate the following English sentences to "
+        f"{lang_dict[lang]} "
+        "\nEnglish: {{sentence_eng_Latn}} "
+        f"\n{lang_dict[lang]}: ",
+        "prompt_3": f"As a {lang_dict[lang]} and English linguist, translate the following {lang_dict[lang]} sentences "
+        f"to English \n{lang_dict[lang]}: {{{{{language_column_name}}}}}\nEnglish: ",
+        "prompt_3_reverse": f"As a {lang_dict[lang]} and English linguist, translate the following English sentences to "
+        f"{lang_dict[lang]} "
+        "\nEnglish: {{sentence_eng_Latn}} "
+        f"\n{lang_dict[lang]}: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse: bool) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "afr_Latn": "Afrikaans",
+        "amh_Ethi": "Amharic",
+        "arb_Arab": "Arabic",
+        "bem_Latn": "Bemba",
+        "ewe_Latn": "Ewe",
+        "fra_Latn": "French",
+        "hau_Latn": "Hausa",
+        "ibo_Latn": "Igbo",
+        "kin_Latn": "Kinyarwanda",
+        "mey_Arab": "Hassaniya Arabic",
+        "mlg_Latn": "Malagasy",
+        "msa_Latn": "Malay",
+        "nde_Latn": "North Ndebele",
+        "nso_Latn": "Northern Sotho",
+        "nya_Latn": "Chichewa",
+        "orm_Ethi": "Oromo",
+        "shi_Arab": "Tachelhit",
+        "sna_Latn": "Shona (Latin)",
+        "som_Latn": "Somali",
+        "ssw_Latn": "Swati",
+        "swa_Latn": "Swahili",
+        "tam_Taml": "Tamil",
+        "tel_Telu": "Telugu",
+        "tir_Ethi": "Tigrinya",
+        "ton_Latn": "Tongan",
+        "tsn_Latn": "Tswana",
+        "urd_Arab": "Urdu",
+        "ven_Latn": "Venda",
+        "wol_Latn": "Wolof",
+        "xho_Latn": "Xhosa",
+        "yor_Latn": "Yoruba",
+        "zul_Latn": "Zulu",
+    }
+
+    for lang in languages.keys():
+        try:
+            if not reverse:
+                file_name = f"ntrex_{lang}-eng_Latn.yaml"
+                task_name = f"ntrex_{lang}-eng_Latn_{mode}"
+                yaml_template = "ntrex"
+                yaml_details = {
+                    "include": yaml_template,
+                    "dataset_name": f"{lang}",
+                    "task": task_name,
+                    "doc_to_target": "sentence_eng_Latn",
+                    "doc_to_text": prompt_func(mode, lang, languages),
+                }
+                os.makedirs(f"{output_dir}/{mode}/african-english", exist_ok=True)
+                with open(
+                    f"{output_dir}/{mode}/african-english/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
+                ) as f:
+                    f.write("# Generated by utils.py\n")
+                    yaml.dump(
+                        yaml_details,
+                        f,
+                        allow_unicode=True,
+                    )
+            else:
+                file_name = f"ntrex_eng_Latn-{lang}.yaml"
+                task_name = f"ntrex_eng_Latn-{lang}_{mode}"
+                yaml_template = "ntrex"
+                yaml_details = {
+                    "include": yaml_template,
+                    "dataset_name": f"{lang}",
+                    "task": task_name,
+                    "doc_to_target": f"sentence_{lang}",
+                    "doc_to_text": prompt_func(f"{mode}_reverse", lang, languages),
+                }
+                os.makedirs(f"{output_dir}/{mode}/english-african", exist_ok=True)
+                with open(
+                    f"{output_dir}/{mode}/english-african/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
+                ) as f:
+                    f.write("# Generated by utils.py\n")
+                    yaml.dump(
+                        yaml_details,
+                        f,
+                        allow_unicode=True,
+                    )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3"],
+        help="Prompt number",
+    )
+    parser.add_argument(
+        "--reverse",
+        default=False,
+        choices=[True, False],
+        help="Reverse the translation direction",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(
+        output_dir=args.output_dir,
+        overwrite=args.overwrite,
+        mode=args.mode,
+        reverse=args.reverse,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/ntrex/ntrex.yaml b/lm_eval/tasks/afrobench/ntrex/ntrex.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c30b08cea2ffdbf775cfeeb8957c47e9e807518a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/ntrex.yaml
@@ -0,0 +1,14 @@
+group: african_ntrex
+task:
+  - ntrex_eng-afr_prompt_1
+  - ntrex_eng-afr_prompt_2
+  - ntrex_eng-afr_prompt_3
+  - ntrex_afr-eng_prompt_1
+  - ntrex_afr-eng_prompt_2
+  - ntrex_afr-eng_prompt_3
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex
new file mode 100644
index 0000000000000000000000000000000000000000..3c2659d752c9f14412d23f3c1e553fbb03a16b03
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex
@@ -0,0 +1,26 @@
+tag:
+- ntrex_tasks
+- ntrex_afr-eng
+- ntrex_afr-eng_prompt_1
+- afrobench_MT_tasks
+dataset_path: masakhane/ntrex_african
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: test
+fewshot_split: test
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_afr_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_afr_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb11904366801d649186548e124027489497a4cb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_afr_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Afrikaans: {{sentence_afr_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_afr_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_amh_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_amh_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0114a212b89bee62243b3adedad49066998d1785
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_amh_Ethi-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Amharic: {{sentence_amh_Ethi}} \nEnglish: "
+include: ntrex
+task: ntrex_amh_Ethi-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_arb_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_arb_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ddc8c4bbd403a3b83c15172d119ae183247c522
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_arb_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: arb_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Arabic: {{sentence_arb_Arab}} \nEnglish: "
+include: ntrex
+task: ntrex_arb_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_bem_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_bem_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c33ab35a18175300ffbf938b2431652ecf86017e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_bem_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Bemba: {{sentence_bem_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_bem_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ewe_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ewe_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5f69c0051ac2292ef1282ac6c8844ee61bc5148
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ewe_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Ewe: {{sentence_ewe_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_ewe_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_fra_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_fra_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa3fad61684684f7155bf40704397cff7d5bcbc8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_fra_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "French: {{sentence_fra_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_fra_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_hau_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_hau_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8b6d0f28b84d4c89d96f3db9de8478201265fade
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_hau_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Hausa: {{sentence_hau_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_hau_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ibo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ibo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..992598614c1d9fb0929ca024260a31b953a1204e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ibo_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Igbo: {{sentence_ibo_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_ibo_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_kin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_kin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eee96a62b961371c1fd1f069e97cd94ebef5b4d5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_kin_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Kinyarwanda: {{sentence_kin_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_kin_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_mey_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_mey_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6650e644ad9b84df3c93bb6622543f8984bc4f8b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_mey_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: mey_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Hassaniya Arabic: {{sentence_mey_Arab}} \nEnglish: "
+include: ntrex
+task: ntrex_mey_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_mlg_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_mlg_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..375522c5c8560747a2775ec380b4964296dec7e3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_mlg_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: mlg_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Malagasy: {{sentence_mlg_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_mlg_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_msa_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_msa_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65aaaa8014abf84963112a1b7f0239f4129c20bb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_msa_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: msa_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Malay: {{sentence_msa_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_msa_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nde_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nde_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d63548fb439470b4d46fb7225fa521f31becc77f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nde_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nde_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "North Ndebele: {{sentence_nde_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_nde_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4cf1cccf8a2562b0c958457561c7c4c9a5ae6776
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nso_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Northern Sotho: {{sentence_nso_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_nso_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nya_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nya_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee4ac6d73f198367a96c684921e6e65e9a0adea7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_nya_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Chichewa: {{sentence_nya_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_nya_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_orm_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_orm_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..446873065b536f58bfa12e5886f49edd1b7ea5ee
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_orm_Ethi-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Oromo: {{sentence_orm_Ethi}} \nEnglish: "
+include: ntrex
+task: ntrex_orm_Ethi-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_shi_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_shi_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10972893f3f453f91d12845d9fea3e43558c1fc4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_shi_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: shi_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tachelhit: {{sentence_shi_Arab}} \nEnglish: "
+include: ntrex
+task: ntrex_shi_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_sna_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_sna_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63d83528835e8ae79f82d09007a4494ccaf1229c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_sna_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Shona (Latin): {{sentence_sna_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_sna_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_som_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_som_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6eb91e0310fcebd6483a3d43aca793e3a6934b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_som_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Somali: {{sentence_som_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_som_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ssw_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ssw_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48c5c10973911aa3b779071ffa96513e1e1f7a7a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ssw_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Swati: {{sentence_ssw_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_ssw_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_swa_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_swa_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..863222f7325fab67ff5afe3a13bef0cc0f4df035
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_swa_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swa_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Swahili: {{sentence_swa_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_swa_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tam_Taml-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tam_Taml-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..993b480f20e34eab5f1c4cdfb644e09e0e978264
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tam_Taml-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tam_Taml
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tamil: {{sentence_tam_Taml}} \nEnglish: "
+include: ntrex
+task: ntrex_tam_Taml-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tel_Telu-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tel_Telu-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d91e9a1f762a013ed992d04a2c9e9f0049d8f7eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tel_Telu-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tel_Telu
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Telugu: {{sentence_tel_Telu}} \nEnglish: "
+include: ntrex
+task: ntrex_tel_Telu-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tir_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tir_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f23f332c1ca392e44c62638d8e39a79f8839b54d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tir_Ethi-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tigrinya: {{sentence_tir_Ethi}} \nEnglish: "
+include: ntrex
+task: ntrex_tir_Ethi-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ton_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ton_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5676a1a99997aca3d0bfc4120003ccb4edef3099
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ton_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ton_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tongan: {{sentence_ton_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_ton_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tsn_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tsn_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..caa0f9e57b1d93a4c074cca1c816ded7a93c3eb6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_tsn_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Tswana: {{sentence_tsn_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_tsn_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_urd_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_urd_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e07e6787868ec0a56e7b79b2246fcd2211c19d1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_urd_Arab-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: urd_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Urdu: {{sentence_urd_Arab}} \nEnglish: "
+include: ntrex
+task: ntrex_urd_Arab-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ven_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ven_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ba8ceaf4921b087cf38dce53a8c9bb49c359389
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_ven_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ven_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Venda: {{sentence_ven_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_ven_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_wol_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_wol_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dcacb69de3f8fd83c5714494665cfb7f8cc7be1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_wol_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Wolof: {{sentence_wol_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_wol_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_xho_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_xho_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b6abc9dcbf53879148418592fd155f95026bba8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_xho_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Xhosa: {{sentence_xho_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_xho_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_yor_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_yor_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e98aecd5b188aabf46c2c00b9a126616fee55f6f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_yor_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Yoruba: {{sentence_yor_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_yor_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_zul_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_zul_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a38abee1148ad1b77a5395afa48621070ad3c239
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/african-english/ntrex_zul_Latn-eng_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "Zulu: {{sentence_zul_Latn}} \nEnglish: "
+include: ntrex
+task: ntrex_zul_Latn-eng_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex
new file mode 100644
index 0000000000000000000000000000000000000000..2b5aa84f990e10804a9cdc8ca69901bfb55e5d71
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex
@@ -0,0 +1,26 @@
+tag:
+- ntrex_tasks
+- ntrex_eng-afr
+- ntrex_eng-afr_prompt_1
+- afrobench_MT_tasks
+dataset_path: masakhane/ntrex_african
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: test
+fewshot_split: test
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-afr_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-afr_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..40471f80151bacf355f8bf8ff617027f9da68ef7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-afr_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_target: sentence_afr_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nAfrikaans: "
+include: ntrex
+task: ntrex_eng_Latn-afr_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-amh_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-amh_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6e4dfba5dc799649532e9e6b28c862b25afb9566
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-amh_Ethi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_target: sentence_amh_Ethi
+doc_to_text: "English: {{sentence_eng_Latn}} \nAmharic: "
+include: ntrex
+task: ntrex_eng_Latn-amh_Ethi_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-arb_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-arb_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a248a9ac6da1668ce1fab555fb7ad586cf0acaa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-arb_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: arb_Arab
+doc_to_target: sentence_arb_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nArabic: "
+include: ntrex
+task: ntrex_eng_Latn-arb_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-bem_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-bem_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..035c682256b81ca9cc7dda1aebfc9ac130a75762
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-bem_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_target: sentence_bem_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nBemba: "
+include: ntrex
+task: ntrex_eng_Latn-bem_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ewe_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ewe_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f5deae5c56b3bb203b372298207e7fa8d79cfb58
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ewe_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_target: sentence_ewe_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nEwe: "
+include: ntrex
+task: ntrex_eng_Latn-ewe_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-fra_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-fra_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf079cf440f75a35edbea04e8afa0703ab0eea7b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-fra_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_target: sentence_fra_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nFrench: "
+include: ntrex
+task: ntrex_eng_Latn-fra_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-hau_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-hau_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..377acbfb8ef84be01d8657907a33d1f141b66795
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-hau_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_target: sentence_hau_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nHausa: "
+include: ntrex
+task: ntrex_eng_Latn-hau_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ibo_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ibo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c3a14dfa2200c29eb83825a6efb202905e6e78f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ibo_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_target: sentence_ibo_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nIgbo: "
+include: ntrex
+task: ntrex_eng_Latn-ibo_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-kin_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-kin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec14399e37649d7671f81f5348d74e76235df4e3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-kin_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_target: sentence_kin_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nKinyarwanda: "
+include: ntrex
+task: ntrex_eng_Latn-kin_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-mey_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-mey_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb696cc5ac25f1f43c276c34e26b97b7c82efaee
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-mey_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: mey_Arab
+doc_to_target: sentence_mey_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nHassaniya Arabic: "
+include: ntrex
+task: ntrex_eng_Latn-mey_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-mlg_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-mlg_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..035c98c373ff6738310cb280cd617df60c8b6a2a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-mlg_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: mlg_Latn
+doc_to_target: sentence_mlg_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nMalagasy: "
+include: ntrex
+task: ntrex_eng_Latn-mlg_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-msa_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-msa_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4c6b7d7f1f904ce5fe6061eb7c4c8caef86a8af
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-msa_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: msa_Latn
+doc_to_target: sentence_msa_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nMalay: "
+include: ntrex
+task: ntrex_eng_Latn-msa_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nde_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nde_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c66b44beee186f47ea9f8b4d62776d60e4be3ba9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nde_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nde_Latn
+doc_to_target: sentence_nde_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nNorth Ndebele: "
+include: ntrex
+task: ntrex_eng_Latn-nde_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nso_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ab6cf296c3959910f99b2edd6354d49259da7ce4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nso_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_target: sentence_nso_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nNorthern Sotho: "
+include: ntrex
+task: ntrex_eng_Latn-nso_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nya_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nya_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74cbd1ffed9675feaff5ead68f147fc2572b4edd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-nya_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_target: sentence_nya_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nChichewa: "
+include: ntrex
+task: ntrex_eng_Latn-nya_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-orm_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-orm_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad875cab5b7012caecd06b99a8d7047ad50c403c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-orm_Ethi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: orm_Ethi
+doc_to_target: sentence_orm_Ethi
+doc_to_text: "English: {{sentence_eng_Latn}} \nOromo: "
+include: ntrex
+task: ntrex_eng_Latn-orm_Ethi_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-shi_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-shi_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5441bbdb6ea535f01c71753b9df5ee3290a7cac3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-shi_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: shi_Arab
+doc_to_target: sentence_shi_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nTachelhit: "
+include: ntrex
+task: ntrex_eng_Latn-shi_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-sna_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-sna_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0bed0f6c195e7945329b7d26b50bb5d2abd62c90
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-sna_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_target: sentence_sna_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nShona (Latin): "
+include: ntrex
+task: ntrex_eng_Latn-sna_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-som_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-som_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e4aafdfc79bd2e31747847ec081ae15f3799dc3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-som_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_target: sentence_som_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSomali: "
+include: ntrex
+task: ntrex_eng_Latn-som_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ssw_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ssw_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa18ebf233e0cdbfd5b7d692356f0eacc1cf669a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ssw_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_target: sentence_ssw_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSwati: "
+include: ntrex
+task: ntrex_eng_Latn-ssw_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-swa_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-swa_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a7079ec01354ca1d56fa593c4b2a5dab668f5c0c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-swa_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: swa_Latn
+doc_to_target: sentence_swa_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nSwahili: "
+include: ntrex
+task: ntrex_eng_Latn-swa_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tam_Taml.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tam_Taml.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b7e42a36beee8d83d057b6daf7b6cfa488b2d90f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tam_Taml.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tam_Taml
+doc_to_target: sentence_tam_Taml
+doc_to_text: "English: {{sentence_eng_Latn}} \nTamil: "
+include: ntrex
+task: ntrex_eng_Latn-tam_Taml_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tel_Telu.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tel_Telu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db8eb6b20ef17fb518b1c45a8753e72f205a7e41
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tel_Telu.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tel_Telu
+doc_to_target: sentence_tel_Telu
+doc_to_text: "English: {{sentence_eng_Latn}} \nTelugu: "
+include: ntrex
+task: ntrex_eng_Latn-tel_Telu_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tir_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tir_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45c6ae84c642d58db1ebdbf45feb112c4e872bea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tir_Ethi.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_target: sentence_tir_Ethi
+doc_to_text: "English: {{sentence_eng_Latn}} \nTigrinya: "
+include: ntrex
+task: ntrex_eng_Latn-tir_Ethi_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ton_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ton_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a680a2c67f5226248043a9d8325b94f7fa4ad57
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ton_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ton_Latn
+doc_to_target: sentence_ton_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nTongan: "
+include: ntrex
+task: ntrex_eng_Latn-ton_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tsn_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tsn_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5a7a4ca261a1b8bfcdd1614eaa167c81c46c1d0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-tsn_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_target: sentence_tsn_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nTswana: "
+include: ntrex
+task: ntrex_eng_Latn-tsn_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-urd_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-urd_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ee69ded9fac3efbc400bbf39aadb529eee26e3d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-urd_Arab.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: urd_Arab
+doc_to_target: sentence_urd_Arab
+doc_to_text: "English: {{sentence_eng_Latn}} \nUrdu: "
+include: ntrex
+task: ntrex_eng_Latn-urd_Arab_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ven_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ven_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4277ce08a5d44f22996d704e0bfbd7461103a0ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-ven_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ven_Latn
+doc_to_target: sentence_ven_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nVenda: "
+include: ntrex
+task: ntrex_eng_Latn-ven_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-wol_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-wol_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dea533ee5e959705c664d5b6e2ee10244c81d3f1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-wol_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_target: sentence_wol_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nWolof: "
+include: ntrex
+task: ntrex_eng_Latn-wol_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-xho_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-xho_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62ab64bfd5e1a6d7a92e1491824047c4853b7e56
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-xho_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_target: sentence_xho_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nXhosa: "
+include: ntrex
+task: ntrex_eng_Latn-xho_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-yor_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-yor_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d96624ae3b9faeacd9b13bf8dcbaf95dafd1040
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-yor_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_target: sentence_yor_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nYoruba: "
+include: ntrex
+task: ntrex_eng_Latn-yor_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-zul_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-zul_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db60fb59821685f837f5f184647564f3e18f4927
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_1/english-african/ntrex_eng_Latn-zul_Latn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_target: sentence_zul_Latn
+doc_to_text: "English: {{sentence_eng_Latn}} \nZulu: "
+include: ntrex
+task: ntrex_eng_Latn-zul_Latn_prompt_1
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex
new file mode 100644
index 0000000000000000000000000000000000000000..3dc29226bf4677ee34836dbc0c5c206cbb1744bd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex
@@ -0,0 +1,25 @@
+tag:
+- ntrex_afr-eng
+- ntrex_afr-eng_prompt_2
+- afrobench_MT_tasks
+dataset_path: masakhane/ntrex_african
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: test
+fewshot_split: test
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_afr_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_afr_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16cfc7d5d0811aec8fca3bcbc7a436f74391cda5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_afr_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Afrikaans sentences\
+  \ to English \nAfrikaans: {{sentence_afr_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_afr_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_amh_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_amh_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20e88c366d9c477928abda6bebd2a73d26d00e36
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_amh_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Amharic sentences\
+  \ to English \nAmharic: {{sentence_amh_Ethi}}\nEnglish: "
+include: ntrex
+task: ntrex_amh_Ethi-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_arb_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_arb_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a88a478a12a99d5910360dab8b6fa6fac1b78601
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_arb_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arb_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Arabic sentences\
+  \ to English \nArabic: {{sentence_arb_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_arb_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_bem_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_bem_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3e114a3464d6cb98baf2374ccaacbc45c3f91240
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_bem_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Bemba sentences\
+  \ to English \nBemba: {{sentence_bem_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_bem_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ewe_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ewe_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5e4facd5106291d0fe52d5315d1f6a88a6f32afe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ewe_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Ewe sentences\
+  \ to English \nEwe: {{sentence_ewe_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ewe_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_fra_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_fra_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad46aedf727a431a166cac1b9ec45be707feb9bb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_fra_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following French sentences\
+  \ to English \nFrench: {{sentence_fra_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_fra_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_hau_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_hau_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..018a63963e8aeedeb3457a49cbf3d97adf4e8c82
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_hau_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Hausa sentences\
+  \ to English \nHausa: {{sentence_hau_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_hau_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ibo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ibo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b93d2d863d60ded18b4e746badafe81e9a3e917
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ibo_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Igbo sentences\
+  \ to English \nIgbo: {{sentence_ibo_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ibo_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_kin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_kin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45b18a640b749e848a8d7df9c01ac2121afb5c2d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_kin_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Kinyarwanda sentences\
+  \ to English \nKinyarwanda: {{sentence_kin_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_kin_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_mey_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_mey_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d155b62c828b30e1505e194d3a93960ed707c1aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_mey_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mey_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Hassaniya Arabic\
+  \ sentences to English \nHassaniya Arabic: {{sentence_mey_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_mey_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_mlg_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_mlg_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10a7507bae076af1c5aec92ec0db65da9b94f876
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_mlg_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mlg_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Malagasy sentences\
+  \ to English \nMalagasy: {{sentence_mlg_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_mlg_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_msa_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_msa_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be65a0ff07f372df2e3027373aebd4e0176e14ee
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_msa_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: msa_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Malay sentences\
+  \ to English \nMalay: {{sentence_msa_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_msa_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nde_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nde_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4a39fc2c31bc63eb27fdbfb78edaa8c8c59e0ee
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nde_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nde_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following North Ndebele\
+  \ sentences to English \nNorth Ndebele: {{sentence_nde_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_nde_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..290122fab7df120e79d478e81d3cc39cc60e61fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nso_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Northern Sotho\
+  \ sentences to English \nNorthern Sotho: {{sentence_nso_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_nso_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nya_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nya_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de365e011b86d650b6defb5a6fd7abb4a7a0feef
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_nya_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Chichewa sentences\
+  \ to English \nChichewa: {{sentence_nya_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_nya_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_orm_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_orm_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ebe353d1fc9de1f7f89e4f783a57afe0a3699e7f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_orm_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Oromo sentences\
+  \ to English \nOromo: {{sentence_orm_Ethi}}\nEnglish: "
+include: ntrex
+task: ntrex_orm_Ethi-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_shi_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_shi_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2db11ae54e39b0e8a5c5669489fdcd5a81bce29
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_shi_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: shi_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tachelhit sentences\
+  \ to English \nTachelhit: {{sentence_shi_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_shi_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_sna_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_sna_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25600d6347d1973e1a3c4c8f236093044f2f83fb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_sna_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Shona (Latin)\
+  \ sentences to English \nShona (Latin): {{sentence_sna_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_sna_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_som_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_som_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ea6a71d9a423fad3ff285bbc53b3d7f440fac57
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_som_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Somali sentences\
+  \ to English \nSomali: {{sentence_som_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_som_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ssw_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ssw_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2e690a6a49b0e355df71f413522e6905f7601d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ssw_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Swati sentences\
+  \ to English \nSwati: {{sentence_ssw_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ssw_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_swa_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_swa_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e609435f8482f5e2f4daa5253fef21dedcf36a3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_swa_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Swahili sentences\
+  \ to English \nSwahili: {{sentence_swa_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_swa_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tam_Taml-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tam_Taml-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2c9f278f1353e66e341329e7fc9686169ab309e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tam_Taml-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tam_Taml
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tamil sentences\
+  \ to English \nTamil: {{sentence_tam_Taml}}\nEnglish: "
+include: ntrex
+task: ntrex_tam_Taml-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tel_Telu-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tel_Telu-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..15dc359805fb8570302d31ac432f5fd557cca2b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tel_Telu-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tel_Telu
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Telugu sentences\
+  \ to English \nTelugu: {{sentence_tel_Telu}}\nEnglish: "
+include: ntrex
+task: ntrex_tel_Telu-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tir_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tir_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f0bb2b835f9ad4e577b7e415ab7cfea484c1f44
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tir_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tigrinya sentences\
+  \ to English \nTigrinya: {{sentence_tir_Ethi}}\nEnglish: "
+include: ntrex
+task: ntrex_tir_Ethi-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ton_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ton_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84f7d281eea533cb634ea958922ca7041a6e24f8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ton_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ton_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tongan sentences\
+  \ to English \nTongan: {{sentence_ton_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ton_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tsn_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tsn_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a56996418d193d01df084b07b144f240ec45e7b7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_tsn_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Tswana sentences\
+  \ to English \nTswana: {{sentence_tsn_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_tsn_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_urd_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_urd_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47a47875969c6bb7d8032570beee3dcc4303b734
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_urd_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: urd_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Urdu sentences\
+  \ to English \nUrdu: {{sentence_urd_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_urd_Arab-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ven_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ven_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f27b185d8d5c2c2fc0e7b4eb273f5fd8c2241ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_ven_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ven_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Venda sentences\
+  \ to English \nVenda: {{sentence_ven_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ven_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_wol_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_wol_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa2da55ce1e8cc1b76e87dda701ae9e12e2976b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_wol_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Wolof sentences\
+  \ to English \nWolof: {{sentence_wol_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_wol_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_xho_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_xho_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b504cd3126d1a422f94c10b7677c7bd92f0d9311
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_xho_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Xhosa sentences\
+  \ to English \nXhosa: {{sentence_xho_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_xho_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_yor_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_yor_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..03c4cbacd791035cd1757d5ca0ed14b546b445e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_yor_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Yoruba sentences\
+  \ to English \nYoruba: {{sentence_yor_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_yor_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_zul_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_zul_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..760abb6f0253c993c37413c88c8dfced632cdd84
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/african-english/ntrex_zul_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "You are a translation expert. Translate the following Zulu sentences\
+  \ to English \nZulu: {{sentence_zul_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_zul_Latn-eng_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex
new file mode 100644
index 0000000000000000000000000000000000000000..8dd411c3b78988b12ea421df33cf6aaa6caee91c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex
@@ -0,0 +1,25 @@
+tag:
+- ntrex_eng-afr
+- ntrex_eng-afr_prompt_2
+- afrobench_MT_tasks
+dataset_path: masakhane/ntrex_african
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: test
+fewshot_split: test
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-afr_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-afr_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..678e5b21721a6da7f67401d2c26f65c89e3bbf83
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-afr_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_target: sentence_afr_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Afrikaans \nEnglish: {{sentence_eng_Latn}} \nAfrikaans: "
+include: ntrex
+task: ntrex_eng_Latn-afr_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-amh_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-amh_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5ae3dd1acdea962bb7f99533ddbe7255133a97b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-amh_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_target: sentence_amh_Ethi
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Amharic \nEnglish: {{sentence_eng_Latn}} \nAmharic: "
+include: ntrex
+task: ntrex_eng_Latn-amh_Ethi_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-arb_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-arb_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..303ccf471d5c180220c3985909377ba0227bbe41
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-arb_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arb_Arab
+doc_to_target: sentence_arb_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Arabic \nEnglish: {{sentence_eng_Latn}} \nArabic: "
+include: ntrex
+task: ntrex_eng_Latn-arb_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-bem_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-bem_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7992529aebb8b6ae9e2e55d2cfb89e142d047791
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-bem_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_target: sentence_bem_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Bemba \nEnglish: {{sentence_eng_Latn}} \nBemba: "
+include: ntrex
+task: ntrex_eng_Latn-bem_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ewe_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ewe_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3de8e8ebf4d0d15d24a0313d1793f26f7719167d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ewe_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_target: sentence_ewe_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Ewe \nEnglish: {{sentence_eng_Latn}} \nEwe: "
+include: ntrex
+task: ntrex_eng_Latn-ewe_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-fra_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-fra_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc230efe275e4712c7453f77d66290f44702b75d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-fra_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_target: sentence_fra_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to French \nEnglish: {{sentence_eng_Latn}} \nFrench: "
+include: ntrex
+task: ntrex_eng_Latn-fra_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-hau_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-hau_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..009d1a51893395026f0ed3d3f93e1a16c50abacc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-hau_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_target: sentence_hau_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Hausa \nEnglish: {{sentence_eng_Latn}} \nHausa: "
+include: ntrex
+task: ntrex_eng_Latn-hau_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ibo_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ibo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2b27ab59197ab7a8b7069e83ac2186cac6d1510
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ibo_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_target: sentence_ibo_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Igbo \nEnglish: {{sentence_eng_Latn}} \nIgbo: "
+include: ntrex
+task: ntrex_eng_Latn-ibo_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-kin_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-kin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f76077d3af55e07910b0c3aff74d400b3500b530
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-kin_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_target: sentence_kin_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Kinyarwanda \nEnglish: {{sentence_eng_Latn}} \nKinyarwanda: "
+include: ntrex
+task: ntrex_eng_Latn-kin_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-mey_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-mey_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c5b2abaedbf8808a1ee5d15af9c3be837cbe63e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-mey_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mey_Arab
+doc_to_target: sentence_mey_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Hassaniya Arabic \nEnglish: {{sentence_eng_Latn}} \nHassaniya Arabic: "
+include: ntrex
+task: ntrex_eng_Latn-mey_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-mlg_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-mlg_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d25afa1a04a13d8a3bf6e911ac151e7ac1da51f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-mlg_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mlg_Latn
+doc_to_target: sentence_mlg_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Malagasy \nEnglish: {{sentence_eng_Latn}} \nMalagasy: "
+include: ntrex
+task: ntrex_eng_Latn-mlg_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-msa_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-msa_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7b7972bdaa207e0a34812496a40b8524da0305b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-msa_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: msa_Latn
+doc_to_target: sentence_msa_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Malay \nEnglish: {{sentence_eng_Latn}} \nMalay: "
+include: ntrex
+task: ntrex_eng_Latn-msa_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nde_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nde_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..31252c02f9f29312c18039903aa67f26e95499b1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nde_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nde_Latn
+doc_to_target: sentence_nde_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to North Ndebele \nEnglish: {{sentence_eng_Latn}} \nNorth Ndebele: "
+include: ntrex
+task: ntrex_eng_Latn-nde_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nso_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8daba4d8fc456e8a54fe14296d5762be002c3fb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nso_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_target: sentence_nso_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Northern Sotho \nEnglish: {{sentence_eng_Latn}} \nNorthern Sotho: "
+include: ntrex
+task: ntrex_eng_Latn-nso_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nya_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nya_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe01ef879c5c52414bc39372103da5c5bff038fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-nya_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_target: sentence_nya_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Chichewa \nEnglish: {{sentence_eng_Latn}} \nChichewa: "
+include: ntrex
+task: ntrex_eng_Latn-nya_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-orm_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-orm_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f78e4db60165e893242dea30d04503e5ae46ffb9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-orm_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm_Ethi
+doc_to_target: sentence_orm_Ethi
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Oromo \nEnglish: {{sentence_eng_Latn}} \nOromo: "
+include: ntrex
+task: ntrex_eng_Latn-orm_Ethi_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-shi_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-shi_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57ea6c0480bc9bb9458cc4d6fa92215d67a518b0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-shi_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: shi_Arab
+doc_to_target: sentence_shi_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tachelhit \nEnglish: {{sentence_eng_Latn}} \nTachelhit: "
+include: ntrex
+task: ntrex_eng_Latn-shi_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-sna_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-sna_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..399668d33c648f472ea6d980f8ebf2e659726b65
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-sna_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_target: sentence_sna_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Shona (Latin) \nEnglish: {{sentence_eng_Latn}} \nShona (Latin): "
+include: ntrex
+task: ntrex_eng_Latn-sna_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-som_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-som_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8a29749aaeeda6c166b3a1cfecf843ef2f2ddfb3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-som_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_target: sentence_som_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Somali \nEnglish: {{sentence_eng_Latn}} \nSomali: "
+include: ntrex
+task: ntrex_eng_Latn-som_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ssw_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ssw_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a67ffdeee1465a9cd19b126e3a53a0e6ac054d05
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ssw_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_target: sentence_ssw_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Swati \nEnglish: {{sentence_eng_Latn}} \nSwati: "
+include: ntrex
+task: ntrex_eng_Latn-ssw_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-swa_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-swa_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0be54151da2d262039dd2c77753f0def8810e528
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-swa_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa_Latn
+doc_to_target: sentence_swa_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Swahili \nEnglish: {{sentence_eng_Latn}} \nSwahili: "
+include: ntrex
+task: ntrex_eng_Latn-swa_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tam_Taml.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tam_Taml.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..387ac60dafe76aaf13adde1adb9830613172054a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tam_Taml.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tam_Taml
+doc_to_target: sentence_tam_Taml
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tamil \nEnglish: {{sentence_eng_Latn}} \nTamil: "
+include: ntrex
+task: ntrex_eng_Latn-tam_Taml_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tel_Telu.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tel_Telu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b1de396146f0c21caab39fea111fadfd53fce53
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tel_Telu.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tel_Telu
+doc_to_target: sentence_tel_Telu
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Telugu \nEnglish: {{sentence_eng_Latn}} \nTelugu: "
+include: ntrex
+task: ntrex_eng_Latn-tel_Telu_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tir_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tir_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da402211718e0126e2281d32f1991c946b2a23fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tir_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_target: sentence_tir_Ethi
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tigrinya \nEnglish: {{sentence_eng_Latn}} \nTigrinya: "
+include: ntrex
+task: ntrex_eng_Latn-tir_Ethi_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ton_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ton_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8c466929bee40fb7ba2f5b000310925908251fd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ton_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ton_Latn
+doc_to_target: sentence_ton_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tongan \nEnglish: {{sentence_eng_Latn}} \nTongan: "
+include: ntrex
+task: ntrex_eng_Latn-ton_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tsn_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tsn_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca918e1de6790bff10cabd103e546651643686d3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-tsn_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_target: sentence_tsn_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Tswana \nEnglish: {{sentence_eng_Latn}} \nTswana: "
+include: ntrex
+task: ntrex_eng_Latn-tsn_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-urd_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-urd_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8539df766542c9a7263ad62992b5fe619de2f23e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-urd_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: urd_Arab
+doc_to_target: sentence_urd_Arab
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Urdu \nEnglish: {{sentence_eng_Latn}} \nUrdu: "
+include: ntrex
+task: ntrex_eng_Latn-urd_Arab_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ven_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ven_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e923b12ce695253b39965bda6352121271514123
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-ven_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ven_Latn
+doc_to_target: sentence_ven_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Venda \nEnglish: {{sentence_eng_Latn}} \nVenda: "
+include: ntrex
+task: ntrex_eng_Latn-ven_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-wol_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-wol_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..707b76a59f0bd3a661dfac59eb9413c46d323c8b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-wol_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_target: sentence_wol_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Wolof \nEnglish: {{sentence_eng_Latn}} \nWolof: "
+include: ntrex
+task: ntrex_eng_Latn-wol_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-xho_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-xho_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7f51491f88c75f9d2da270209fbe32b56bc529b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-xho_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_target: sentence_xho_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Xhosa \nEnglish: {{sentence_eng_Latn}} \nXhosa: "
+include: ntrex
+task: ntrex_eng_Latn-xho_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-yor_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-yor_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f3e4be543796276d04c65001199521701f02ed9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-yor_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_target: sentence_yor_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Yoruba \nEnglish: {{sentence_eng_Latn}} \nYoruba: "
+include: ntrex
+task: ntrex_eng_Latn-yor_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-zul_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-zul_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..946d0020ddb845653bc574e7cb8de54bf3a35a00
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_2/english-african/ntrex_eng_Latn-zul_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_target: sentence_zul_Latn
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Zulu \nEnglish: {{sentence_eng_Latn}} \nZulu: "
+include: ntrex
+task: ntrex_eng_Latn-zul_Latn_prompt_2
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex
new file mode 100644
index 0000000000000000000000000000000000000000..3bab54d824d83e7d201107a00411c22b5ec44a1b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex
@@ -0,0 +1,25 @@
+tag:
+- ntrex_afr-eng
+- ntrex_afr-eng_prompt_3
+- afrobench_MT_tasks
+dataset_path: masakhane/ntrex_african
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: test
+fewshot_split: test
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_afr_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_afr_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09cbbfc56e84748c37d86366a68162b82869d918
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_afr_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Afrikaans and English linguist, translate the following Afrikaans\
+  \ sentences to English \nAfrikaans: {{sentence_afr_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_afr_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_amh_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_amh_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33530440f5e232fa8b86267e8c42fab503d0c551
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_amh_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Amharic and English linguist, translate the following Amharic sentences\
+  \ to English \nAmharic: {{sentence_amh_Ethi}}\nEnglish: "
+include: ntrex
+task: ntrex_amh_Ethi-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_arb_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_arb_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..858c0605f39ec13ecbed5733fad5d5eef3d275ad
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_arb_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arb_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Arabic and English linguist, translate the following Arabic sentences\
+  \ to English \nArabic: {{sentence_arb_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_arb_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_bem_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_bem_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3be00855b6bfb5318442ebf3603c6c611f1c319c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_bem_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Bemba and English linguist, translate the following Bemba sentences\
+  \ to English \nBemba: {{sentence_bem_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_bem_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ewe_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ewe_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..493176a7baed9c8b7ad64e6e028ce3b00d8a1067
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ewe_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Ewe and English linguist, translate the following Ewe sentences\
+  \ to English \nEwe: {{sentence_ewe_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ewe_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_fra_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_fra_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b009a37bfd786707c077c55b391bacba7e6dad15
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_fra_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a French and English linguist, translate the following French sentences\
+  \ to English \nFrench: {{sentence_fra_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_fra_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_hau_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_hau_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3c6f72111e504082a34674e05670288b6877d3c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_hau_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Hausa and English linguist, translate the following Hausa sentences\
+  \ to English \nHausa: {{sentence_hau_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_hau_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ibo_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ibo_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4b7e768d4c4f4719842aa11af37f6afd89d4f9d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ibo_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Igbo and English linguist, translate the following Igbo sentences\
+  \ to English \nIgbo: {{sentence_ibo_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ibo_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_kin_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_kin_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bba2d32395d22f86492720856e1801d586cab8ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_kin_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Kinyarwanda and English linguist, translate the following Kinyarwanda\
+  \ sentences to English \nKinyarwanda: {{sentence_kin_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_kin_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_mey_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_mey_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a567548787c0b007306ef41762b9934eb1ad36e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_mey_Arab-eng_Latn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: mey_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Hassaniya Arabic and English linguist, translate the following\
+  \ Hassaniya Arabic sentences to English \nHassaniya Arabic: {{sentence_mey_Arab}}\n\
+  English: "
+include: ntrex
+task: ntrex_mey_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_mlg_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_mlg_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23d55c5a0b0f86bb04087ca00bb23c1970ad1fbd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_mlg_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mlg_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Malagasy and English linguist, translate the following Malagasy\
+  \ sentences to English \nMalagasy: {{sentence_mlg_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_mlg_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_msa_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_msa_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa1a9618f29da1bbcc7171fda71629d593cada91
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_msa_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: msa_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Malay and English linguist, translate the following Malay sentences\
+  \ to English \nMalay: {{sentence_msa_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_msa_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nde_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nde_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..855defd07be478e59de82c3eccb19e42dd07f042
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nde_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nde_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a North Ndebele and English linguist, translate the following North\
+  \ Ndebele sentences to English \nNorth Ndebele: {{sentence_nde_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_nde_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nso_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nso_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29a7452c39392d1aa94bf3db22ed0ee9b62dd120
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nso_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Northern Sotho and English linguist, translate the following Northern\
+  \ Sotho sentences to English \nNorthern Sotho: {{sentence_nso_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_nso_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nya_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nya_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..505586519ea0ea567364ae76d5db58fdec05da08
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_nya_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Chichewa and English linguist, translate the following Chichewa\
+  \ sentences to English \nChichewa: {{sentence_nya_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_nya_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_orm_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_orm_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a38e9312cdb30b6bc62b2d3f23c1e5583f043b6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_orm_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Oromo and English linguist, translate the following Oromo sentences\
+  \ to English \nOromo: {{sentence_orm_Ethi}}\nEnglish: "
+include: ntrex
+task: ntrex_orm_Ethi-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_shi_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_shi_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19f363ef2439a4986282116f9a34026205ebd431
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_shi_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: shi_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tachelhit and English linguist, translate the following Tachelhit\
+  \ sentences to English \nTachelhit: {{sentence_shi_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_shi_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_sna_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_sna_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c7a63157eca8bdc6f8e2488fc1ef10b9941dbb9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_sna_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Shona (Latin) and English linguist, translate the following Shona\
+  \ (Latin) sentences to English \nShona (Latin): {{sentence_sna_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_sna_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_som_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_som_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..685f38233c655048cb55819812247eefaea19527
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_som_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Somali and English linguist, translate the following Somali sentences\
+  \ to English \nSomali: {{sentence_som_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_som_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ssw_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ssw_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd95665f6f17301764a1f1ad0d525352fd8f69bd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ssw_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Swati and English linguist, translate the following Swati sentences\
+  \ to English \nSwati: {{sentence_ssw_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ssw_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_swa_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_swa_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0731d37346153f0d9dec96c8211f7a8250ec3f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_swa_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Swahili and English linguist, translate the following Swahili sentences\
+  \ to English \nSwahili: {{sentence_swa_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_swa_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tam_Taml-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tam_Taml-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..834320d846a40fcf6bd53c9445f051c38c38a439
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tam_Taml-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tam_Taml
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tamil and English linguist, translate the following Tamil sentences\
+  \ to English \nTamil: {{sentence_tam_Taml}}\nEnglish: "
+include: ntrex
+task: ntrex_tam_Taml-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tel_Telu-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tel_Telu-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7fde743dcfd343546dcaa042bb3fef8a49a194d5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tel_Telu-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tel_Telu
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Telugu and English linguist, translate the following Telugu sentences\
+  \ to English \nTelugu: {{sentence_tel_Telu}}\nEnglish: "
+include: ntrex
+task: ntrex_tel_Telu-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tir_Ethi-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tir_Ethi-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60189ee73207fc08911821188f61e23eb12dc62e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tir_Ethi-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tigrinya and English linguist, translate the following Tigrinya\
+  \ sentences to English \nTigrinya: {{sentence_tir_Ethi}}\nEnglish: "
+include: ntrex
+task: ntrex_tir_Ethi-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ton_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ton_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec2b5ba992a535f5f5f4fd6b269653b213f1b39a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ton_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ton_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tongan and English linguist, translate the following Tongan sentences\
+  \ to English \nTongan: {{sentence_ton_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ton_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tsn_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tsn_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fa63ca4b77edb7b0907e6660ce31df7ce0ea7278
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_tsn_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Tswana and English linguist, translate the following Tswana sentences\
+  \ to English \nTswana: {{sentence_tsn_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_tsn_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_urd_Arab-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_urd_Arab-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b520795f2fd8e986b8292b985e36769c76f3553
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_urd_Arab-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: urd_Arab
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Urdu and English linguist, translate the following Urdu sentences\
+  \ to English \nUrdu: {{sentence_urd_Arab}}\nEnglish: "
+include: ntrex
+task: ntrex_urd_Arab-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ven_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ven_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82372de2dd0624f9b068f27ab24f48433267ea28
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_ven_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ven_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Venda and English linguist, translate the following Venda sentences\
+  \ to English \nVenda: {{sentence_ven_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_ven_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_wol_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_wol_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ae0124f20efadbb363c017ef708b5dfb14311b07
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_wol_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Wolof and English linguist, translate the following Wolof sentences\
+  \ to English \nWolof: {{sentence_wol_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_wol_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_xho_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_xho_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f0528af4efc5cb15035158a7c5789878eaa653b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_xho_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Xhosa and English linguist, translate the following Xhosa sentences\
+  \ to English \nXhosa: {{sentence_xho_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_xho_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_yor_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_yor_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..99d7cf494376be71148044b251c23c7b6f15191d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_yor_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Yoruba and English linguist, translate the following Yoruba sentences\
+  \ to English \nYoruba: {{sentence_yor_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_yor_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_zul_Latn-eng_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_zul_Latn-eng_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30f3b307eef0a64f137cd993ff8571b103b2e91e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/african-english/ntrex_zul_Latn-eng_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_target: sentence_eng_Latn
+doc_to_text: "As a Zulu and English linguist, translate the following Zulu sentences\
+  \ to English \nZulu: {{sentence_zul_Latn}}\nEnglish: "
+include: ntrex
+task: ntrex_zul_Latn-eng_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex
new file mode 100644
index 0000000000000000000000000000000000000000..d001e1f6e6acc14616603aa46a9f412d7abc026b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex
@@ -0,0 +1,25 @@
+tag:
+- ntrex_eng-afr
+- ntrex_eng-afr_prompt_3
+- afrobench_MT_tasks
+dataset_path: masakhane/ntrex_african
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: test
+fewshot_split: test
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+repeats: 1
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-afr_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-afr_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4aaa928ba0d31ca83a7d7eb59462a14715a2abf7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-afr_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_target: sentence_afr_Latn
+doc_to_text: "As a Afrikaans and English linguist, translate the following English\
+  \ sentences to Afrikaans \nEnglish: {{sentence_eng_Latn}} \nAfrikaans: "
+include: ntrex
+task: ntrex_eng_Latn-afr_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-amh_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-amh_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..008f73024a1c7136ba9c7db28badce24097da5d2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-amh_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_target: sentence_amh_Ethi
+doc_to_text: "As a Amharic and English linguist, translate the following English sentences\
+  \ to Amharic \nEnglish: {{sentence_eng_Latn}} \nAmharic: "
+include: ntrex
+task: ntrex_eng_Latn-amh_Ethi_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-arb_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-arb_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0c9e8132374542c605789269c27aabf181dad28
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-arb_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arb_Arab
+doc_to_target: sentence_arb_Arab
+doc_to_text: "As a Arabic and English linguist, translate the following English sentences\
+  \ to Arabic \nEnglish: {{sentence_eng_Latn}} \nArabic: "
+include: ntrex
+task: ntrex_eng_Latn-arb_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-bem_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-bem_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e4ab2af30cba88d413b0c99a868b52614921aed8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-bem_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_target: sentence_bem_Latn
+doc_to_text: "As a Bemba and English linguist, translate the following English sentences\
+  \ to Bemba \nEnglish: {{sentence_eng_Latn}} \nBemba: "
+include: ntrex
+task: ntrex_eng_Latn-bem_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ewe_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ewe_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1c99ad06add81beb54d8b0e3b0d97a987bd2d70
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ewe_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_target: sentence_ewe_Latn
+doc_to_text: "As a Ewe and English linguist, translate the following English sentences\
+  \ to Ewe \nEnglish: {{sentence_eng_Latn}} \nEwe: "
+include: ntrex
+task: ntrex_eng_Latn-ewe_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-fra_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-fra_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3668db57aa9a1b9431dec109be78bf98e0080962
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-fra_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_target: sentence_fra_Latn
+doc_to_text: "As a French and English linguist, translate the following English sentences\
+  \ to French \nEnglish: {{sentence_eng_Latn}} \nFrench: "
+include: ntrex
+task: ntrex_eng_Latn-fra_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-hau_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-hau_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6bca042cb417c3511cf4e8fb442c61239f010a12
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-hau_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_target: sentence_hau_Latn
+doc_to_text: "As a Hausa and English linguist, translate the following English sentences\
+  \ to Hausa \nEnglish: {{sentence_eng_Latn}} \nHausa: "
+include: ntrex
+task: ntrex_eng_Latn-hau_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ibo_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ibo_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c23fcce8fee0b0767977b865af9f24eb27396384
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ibo_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_target: sentence_ibo_Latn
+doc_to_text: "As a Igbo and English linguist, translate the following English sentences\
+  \ to Igbo \nEnglish: {{sentence_eng_Latn}} \nIgbo: "
+include: ntrex
+task: ntrex_eng_Latn-ibo_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-kin_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-kin_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b0041bfb4a44480f142af0c1b9ea37ccd9a47663
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-kin_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_target: sentence_kin_Latn
+doc_to_text: "As a Kinyarwanda and English linguist, translate the following English\
+  \ sentences to Kinyarwanda \nEnglish: {{sentence_eng_Latn}} \nKinyarwanda: "
+include: ntrex
+task: ntrex_eng_Latn-kin_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-mey_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-mey_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..435df83d9fe3e56a2a75cab98df058c54fd5a8a0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-mey_Arab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: mey_Arab
+doc_to_target: sentence_mey_Arab
+doc_to_text: "As a Hassaniya Arabic and English linguist, translate the following\
+  \ English sentences to Hassaniya Arabic \nEnglish: {{sentence_eng_Latn}} \nHassaniya\
+  \ Arabic: "
+include: ntrex
+task: ntrex_eng_Latn-mey_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-mlg_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-mlg_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74f92d92668d1b5a9539c503fd3aa5c687988ed8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-mlg_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mlg_Latn
+doc_to_target: sentence_mlg_Latn
+doc_to_text: "As a Malagasy and English linguist, translate the following English\
+  \ sentences to Malagasy \nEnglish: {{sentence_eng_Latn}} \nMalagasy: "
+include: ntrex
+task: ntrex_eng_Latn-mlg_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-msa_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-msa_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc9a3245f365cdb7c03e5d67e45a9bb236b6477f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-msa_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: msa_Latn
+doc_to_target: sentence_msa_Latn
+doc_to_text: "As a Malay and English linguist, translate the following English sentences\
+  \ to Malay \nEnglish: {{sentence_eng_Latn}} \nMalay: "
+include: ntrex
+task: ntrex_eng_Latn-msa_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nde_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nde_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f7cf092447f290829c9ac3bbbcbb49d915543f26
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nde_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nde_Latn
+doc_to_target: sentence_nde_Latn
+doc_to_text: "As a North Ndebele and English linguist, translate the following English\
+  \ sentences to North Ndebele \nEnglish: {{sentence_eng_Latn}} \nNorth Ndebele: "
+include: ntrex
+task: ntrex_eng_Latn-nde_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nso_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nso_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d52c1ef1f9c4a1ba82ae0f0722669fbf126569f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nso_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_target: sentence_nso_Latn
+doc_to_text: "As a Northern Sotho and English linguist, translate the following English\
+  \ sentences to Northern Sotho \nEnglish: {{sentence_eng_Latn}} \nNorthern Sotho: "
+include: ntrex
+task: ntrex_eng_Latn-nso_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nya_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nya_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a3d395516d48af64ed67d177cd0fa8b28fd9a46
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-nya_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_target: sentence_nya_Latn
+doc_to_text: "As a Chichewa and English linguist, translate the following English\
+  \ sentences to Chichewa \nEnglish: {{sentence_eng_Latn}} \nChichewa: "
+include: ntrex
+task: ntrex_eng_Latn-nya_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-orm_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-orm_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3de07b02307696d09c97eec6120b69580dffade
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-orm_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: orm_Ethi
+doc_to_target: sentence_orm_Ethi
+doc_to_text: "As a Oromo and English linguist, translate the following English sentences\
+  \ to Oromo \nEnglish: {{sentence_eng_Latn}} \nOromo: "
+include: ntrex
+task: ntrex_eng_Latn-orm_Ethi_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-shi_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-shi_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e193c7a3b5a73495c70dcd4176288a79cd6eb2c6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-shi_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: shi_Arab
+doc_to_target: sentence_shi_Arab
+doc_to_text: "As a Tachelhit and English linguist, translate the following English\
+  \ sentences to Tachelhit \nEnglish: {{sentence_eng_Latn}} \nTachelhit: "
+include: ntrex
+task: ntrex_eng_Latn-shi_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-sna_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-sna_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce8c50f5cacf69e70aca8f451ab8bc1fa8270158
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-sna_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_target: sentence_sna_Latn
+doc_to_text: "As a Shona (Latin) and English linguist, translate the following English\
+  \ sentences to Shona (Latin) \nEnglish: {{sentence_eng_Latn}} \nShona (Latin): "
+include: ntrex
+task: ntrex_eng_Latn-sna_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-som_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-som_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b7f46323401a4c04b1026507b1163111fa71455
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-som_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_target: sentence_som_Latn
+doc_to_text: "As a Somali and English linguist, translate the following English sentences\
+  \ to Somali \nEnglish: {{sentence_eng_Latn}} \nSomali: "
+include: ntrex
+task: ntrex_eng_Latn-som_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ssw_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ssw_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f02e88ca3f7d5abb314ea174fe21c35b48af402
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ssw_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_target: sentence_ssw_Latn
+doc_to_text: "As a Swati and English linguist, translate the following English sentences\
+  \ to Swati \nEnglish: {{sentence_eng_Latn}} \nSwati: "
+include: ntrex
+task: ntrex_eng_Latn-ssw_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-swa_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-swa_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47090821da435d1b9d4caada3e91221cd1eed3b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-swa_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swa_Latn
+doc_to_target: sentence_swa_Latn
+doc_to_text: "As a Swahili and English linguist, translate the following English sentences\
+  \ to Swahili \nEnglish: {{sentence_eng_Latn}} \nSwahili: "
+include: ntrex
+task: ntrex_eng_Latn-swa_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tam_Taml.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tam_Taml.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78d61866bd42b467246479946dfa342a6e7835ff
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tam_Taml.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tam_Taml
+doc_to_target: sentence_tam_Taml
+doc_to_text: "As a Tamil and English linguist, translate the following English sentences\
+  \ to Tamil \nEnglish: {{sentence_eng_Latn}} \nTamil: "
+include: ntrex
+task: ntrex_eng_Latn-tam_Taml_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tel_Telu.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tel_Telu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..829635313dd6f5cbc1d08c31a52732aef1513e19
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tel_Telu.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tel_Telu
+doc_to_target: sentence_tel_Telu
+doc_to_text: "As a Telugu and English linguist, translate the following English sentences\
+  \ to Telugu \nEnglish: {{sentence_eng_Latn}} \nTelugu: "
+include: ntrex
+task: ntrex_eng_Latn-tel_Telu_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tir_Ethi.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tir_Ethi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1f27f4389cb2b2be56b02f2427a8d8df222aed17
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tir_Ethi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_target: sentence_tir_Ethi
+doc_to_text: "As a Tigrinya and English linguist, translate the following English\
+  \ sentences to Tigrinya \nEnglish: {{sentence_eng_Latn}} \nTigrinya: "
+include: ntrex
+task: ntrex_eng_Latn-tir_Ethi_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ton_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ton_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3ffeb74fbb04205e0bb1b27d0ec855252688f6e5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ton_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ton_Latn
+doc_to_target: sentence_ton_Latn
+doc_to_text: "As a Tongan and English linguist, translate the following English sentences\
+  \ to Tongan \nEnglish: {{sentence_eng_Latn}} \nTongan: "
+include: ntrex
+task: ntrex_eng_Latn-ton_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tsn_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tsn_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed11f2cba88a703b44ce2a077f761b4cd98135c0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-tsn_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tsn_Latn
+doc_to_target: sentence_tsn_Latn
+doc_to_text: "As a Tswana and English linguist, translate the following English sentences\
+  \ to Tswana \nEnglish: {{sentence_eng_Latn}} \nTswana: "
+include: ntrex
+task: ntrex_eng_Latn-tsn_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-urd_Arab.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-urd_Arab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a05e951bef2f38005b9d7fb3133bdf811f69c565
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-urd_Arab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: urd_Arab
+doc_to_target: sentence_urd_Arab
+doc_to_text: "As a Urdu and English linguist, translate the following English sentences\
+  \ to Urdu \nEnglish: {{sentence_eng_Latn}} \nUrdu: "
+include: ntrex
+task: ntrex_eng_Latn-urd_Arab_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ven_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ven_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4345201694bc0c8a9f9bda487a9ecfb36982c8bf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-ven_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ven_Latn
+doc_to_target: sentence_ven_Latn
+doc_to_text: "As a Venda and English linguist, translate the following English sentences\
+  \ to Venda \nEnglish: {{sentence_eng_Latn}} \nVenda: "
+include: ntrex
+task: ntrex_eng_Latn-ven_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-wol_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-wol_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..48abbb33f870ec3305f8337e62b131bbd38683fb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-wol_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_target: sentence_wol_Latn
+doc_to_text: "As a Wolof and English linguist, translate the following English sentences\
+  \ to Wolof \nEnglish: {{sentence_eng_Latn}} \nWolof: "
+include: ntrex
+task: ntrex_eng_Latn-wol_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-xho_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-xho_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1071a5fb2faad74df4e2f357f412923162b0044
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-xho_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_target: sentence_xho_Latn
+doc_to_text: "As a Xhosa and English linguist, translate the following English sentences\
+  \ to Xhosa \nEnglish: {{sentence_eng_Latn}} \nXhosa: "
+include: ntrex
+task: ntrex_eng_Latn-xho_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-yor_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-yor_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43c1be35ee76adf853e6429e4bb06fea867ce5d6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-yor_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_target: sentence_yor_Latn
+doc_to_text: "As a Yoruba and English linguist, translate the following English sentences\
+  \ to Yoruba \nEnglish: {{sentence_eng_Latn}} \nYoruba: "
+include: ntrex
+task: ntrex_eng_Latn-yor_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-zul_Latn.yaml b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-zul_Latn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10e890a9b3cbffdbb2205d091d91fa42eae880b1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/ntrex/prompt_3/english-african/ntrex_eng_Latn-zul_Latn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_target: sentence_zul_Latn
+doc_to_text: "As a Zulu and English linguist, translate the following English sentences\
+  \ to Zulu \nEnglish: {{sentence_eng_Latn}} \nZulu: "
+include: ntrex
+task: ntrex_eng_Latn-zul_Latn_prompt_3
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/README.md b/lm_eval/tasks/afrobench/openai_mmlu/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe980e87464b07c91d2c766254c760d772d65c36
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/README.md
@@ -0,0 +1,25 @@
+#
+
+## Paper
+Title: `Multilingual Massive Multitask Language Understanding (MMMLU)`
+
+Paper Link: https://arxiv.org/abs/2009.03300
+
+## Abstract
+>We propose a new test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more. To attain high accuracy on this test, models must possess extensive world knowledge and problem solving ability. We find that while most recent models have near random-chance accuracy, the very largest GPT-3 model improves over random chance by almost 20 percentage points on average. However, on every one of the 57 tasks, the best models still need substantial improvements before they can reach expert-level accuracy. Models also have lopsided performance and frequently do not know when they are wrong. Worse, they still have near-random accuracy on some socially important subjects such as morality and law. By comprehensively evaluating the breadth and depth of a model's academic and professional understanding, our test can be used to analyze models across many tasks and to identify important shortcomings.
+
+HomePage: https://huggingface.co/datasets/openai/MMMLU
+
+### Citation
+
+```
+@misc{hendrycks2021measuringmassivemultitasklanguage,
+      title={Measuring Massive Multitask Language Understanding},
+      author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
+      year={2021},
+      eprint={2009.03300},
+      archivePrefix={arXiv},
+      primaryClass={cs.CY},
+      url={https://arxiv.org/abs/2009.03300},
+}
+```
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/openai_mmlu.yaml b/lm_eval/tasks/afrobench/openai_mmlu/openai_mmlu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..541eb43cfdd783b15cad4123437c2dffcf1cc794
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/openai_mmlu.yaml
@@ -0,0 +1,13 @@
+group: openai_mmlu
+task:
+  - openai_mmlu_prompt_1
+  - openai_mmlu_prompt_2
+  - openai_mmlu_prompt_3
+  - openai_mmlu_prompt_4
+  - openai_mmlu_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu
new file mode 100644
index 0000000000000000000000000000000000000000..ce4f02eeda277404713974f4699c716b454514f5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu
@@ -0,0 +1,22 @@
+tag:
+    - openai_mmlu_tasks
+    - openai_mmlu_prompt_1
+    - afrobench_mmlu_tasks
+dataset_path: openai/MMMLU
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{Question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_ara.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_ara.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5c9b86fc1d1c5d8185692c48bc85d991714dbff5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_ara.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: AR_XY
+doc_to_text: 'Q: {{Question.strip()}}
+
+  A: {{A}}
+
+  B: {{B}}
+
+  C: {{C}}
+
+  D: {{D}}
+
+  Please choose the correct answer from the options above:'
+include: openai_mmlu
+task: openai_mmlu_ara_prompt_1
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_swa.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1a3661d45235258f0c0cb1a6bb21119de326ef7f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: SW_KE
+doc_to_text: 'Q: {{Question.strip()}}
+
+  A: {{A}}
+
+  B: {{B}}
+
+  C: {{C}}
+
+  D: {{D}}
+
+  Please choose the correct answer from the options above:'
+include: openai_mmlu
+task: openai_mmlu_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_yor.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4124252bfc0b549160ac802f18c44004792d3bf2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_1/openai_mmlu_yor.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: YO_NG
+doc_to_text: 'Q: {{Question.strip()}}
+
+  A: {{A}}
+
+  B: {{B}}
+
+  C: {{C}}
+
+  D: {{D}}
+
+  Please choose the correct answer from the options above:'
+include: openai_mmlu
+task: openai_mmlu_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu
new file mode 100644
index 0000000000000000000000000000000000000000..9f39b0a9d7423b4d5638f23f294b636240570281
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu
@@ -0,0 +1,22 @@
+tag:
+    - openai_mmlu_tasks
+    - openai_mmlu_prompt_2
+    - afrobench_mmlu_tasks
+dataset_path: openai/MMMLU
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{Question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_ara.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_ara.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..550834257a69f7054ae397a403c6dc00d15c8888
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_ara.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: AR_XY
+doc_to_text: 'Question: {{Question.strip()}}
+
+  1: {{A}}
+
+  2: {{B}}
+
+  3: {{C}}
+
+  4: {{D}}
+
+  Please select the correct answer from the given choices:'
+include: openai_mmlu
+task: openai_mmlu_ara_prompt_2
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_swa.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b3025fd726ab59f48ee90bb65b294580a6cfc3c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: SW_KE
+doc_to_text: 'Question: {{Question.strip()}}
+
+  1: {{A}}
+
+  2: {{B}}
+
+  3: {{C}}
+
+  4: {{D}}
+
+  Please select the correct answer from the given choices:'
+include: openai_mmlu
+task: openai_mmlu_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_yor.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..145b237ef50234278732605b1e3936bfccb9968a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_2/openai_mmlu_yor.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: YO_NG
+doc_to_text: 'Question: {{Question.strip()}}
+
+  1: {{A}}
+
+  2: {{B}}
+
+  3: {{C}}
+
+  4: {{D}}
+
+  Please select the correct answer from the given choices:'
+include: openai_mmlu
+task: openai_mmlu_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu
new file mode 100644
index 0000000000000000000000000000000000000000..95456656739a2490a3e11037e7d9f67f72d60962
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu
@@ -0,0 +1,23 @@
+tag:
+    - openai_mmlu_tasks
+    - openai_mmlu_prompt_3
+    - afrobench_mmlu_tasks
+dataset_path: openai/MMMLU
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{Question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_ara.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_ara.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..012192ceee6638f197db8ea8b9210e1529b6b92d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_ara.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: AR_XY
+doc_to_text: 'Input Question: {{Question.strip()}}
+
+  Option A: {{A}}
+
+  Option B: {{B}}
+
+  Option C: {{C}}
+
+  Option D: {{D}}
+
+  Please indicate the correct option from the list above:'
+include: openai_mmlu
+task: openai_mmlu_ara_prompt_3
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_swa.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..431bdb345178bf44b12ea01507cc805cd000113f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_swa.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: SW_KE
+doc_to_text: 'Input Question: {{Question.strip()}}
+
+  Option A: {{A}}
+
+  Option B: {{B}}
+
+  Option C: {{C}}
+
+  Option D: {{D}}
+
+  Please indicate the correct option from the list above:'
+include: openai_mmlu
+task: openai_mmlu_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_yor.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..814fe380267e57da691f727198f2828042aa54c0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_3/openai_mmlu_yor.yaml
@@ -0,0 +1,15 @@
+# Generated by utils.py
+dataset_name: YO_NG
+doc_to_text: 'Input Question: {{Question.strip()}}
+
+  Option A: {{A}}
+
+  Option B: {{B}}
+
+  Option C: {{C}}
+
+  Option D: {{D}}
+
+  Please indicate the correct option from the list above:'
+include: openai_mmlu
+task: openai_mmlu_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu
new file mode 100644
index 0000000000000000000000000000000000000000..37a5949f93795737f8f61a06fc2824ebb671dbe2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu
@@ -0,0 +1,23 @@
+tag:
+    - openai_mmlu_tasks
+    - openai_mmlu_prompt_4
+    - afrobench_mmlu_tasks
+dataset_path: openai/MMMLU
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{Question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_ara.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_ara.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..793eb7441ce36573953525c3c97e60daffb10b02
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_ara.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: AR_XY
+doc_to_text: 'Critically analyze the question and select the most probable answer
+  from the list:
+
+  {{Question.strip()}}
+
+  Choices:
+
+  A) {{A}}
+
+  B) {{B}}
+
+  C) {{C}}
+
+  D) {{D}}'
+include: openai_mmlu
+task: openai_mmlu_ara_prompt_4
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_swa.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..095dd7ff6d04db581bc070eff001d48600014e0e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_swa.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: SW_KE
+doc_to_text: 'Critically analyze the question and select the most probable answer
+  from the list:
+
+  {{Question.strip()}}
+
+  Choices:
+
+  A) {{A}}
+
+  B) {{B}}
+
+  C) {{C}}
+
+  D) {{D}}'
+include: openai_mmlu
+task: openai_mmlu_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_yor.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd0a9daa1ed5a2ae9882225aefdfe5e653dffcc6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_4/openai_mmlu_yor.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: YO_NG
+doc_to_text: 'Critically analyze the question and select the most probable answer
+  from the list:
+
+  {{Question.strip()}}
+
+  Choices:
+
+  A) {{A}}
+
+  B) {{B}}
+
+  C) {{C}}
+
+  D) {{D}}'
+include: openai_mmlu
+task: openai_mmlu_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu
new file mode 100644
index 0000000000000000000000000000000000000000..77183eb04c0567b83f87bfd17bbdd18bf003f7dd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu
@@ -0,0 +1,23 @@
+tag:
+    - openai_mmlu_tasks
+    - openai_mmlu_prompt_5
+    - afrobench_mmlu_tasks
+dataset_path: openai/MMMLU
+dataset_name: null
+output_type: multiple_choice
+test_split: test
+fewshot_config:
+  sampler: first_n
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(Answer.strip())}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{Question}}"
+doc_to_choice: ["A", "B", "C", "D"]
+metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+  - metric: acc_norm
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_ara.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_ara.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50a6e74ff2198b326716b99a7430102d8aaf0221
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_ara.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: AR_XY
+doc_to_text: 'Answer the question and pick the correct answer from the options: {{Question.strip()}}
+
+  Options:
+
+  A. {{A}}
+
+  B. {{B}}
+
+  C. {{C}}
+
+  D. {{D}}
+
+  Please choose the correct option from the above list:'
+include: openai_mmlu
+task: openai_mmlu_ara_prompt_5
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_swa.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0cc19860cc5f7bc90d499a1ead811a549170eb6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_swa.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: SW_KE
+doc_to_text: 'Answer the question and pick the correct answer from the options: {{Question.strip()}}
+
+  Options:
+
+  A. {{A}}
+
+  B. {{B}}
+
+  C. {{C}}
+
+  D. {{D}}
+
+  Please choose the correct option from the above list:'
+include: openai_mmlu
+task: openai_mmlu_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_yor.yaml b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..691657ef46974107e46c37291fb1efa66364a5b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/prompt_5/openai_mmlu_yor.yaml
@@ -0,0 +1,17 @@
+# Generated by utils.py
+dataset_name: YO_NG
+doc_to_text: 'Answer the question and pick the correct answer from the options: {{Question.strip()}}
+
+  Options:
+
+  A. {{A}}
+
+  B. {{B}}
+
+  C. {{C}}
+
+  D. {{D}}
+
+  Please choose the correct option from the above list:'
+include: openai_mmlu
+task: openai_mmlu_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/openai_mmlu/utils.py b/lm_eval/tasks/afrobench/openai_mmlu/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fc0fea958c32b2b8d104f586045564c04de8c86
--- /dev/null
+++ b/lm_eval/tasks/afrobench/openai_mmlu/utils.py
@@ -0,0 +1,99 @@
+import argparse
+import os
+
+import yaml
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Q: {{Question.strip()}}\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nPlease choose the correct answer from the options above:",
+        "prompt_2": "Question: {{Question.strip()}}\n1: {{A}}\n2: {{B}}\n3: {{C}}\n4: {{D}}\nPlease select the correct answer from the given choices:",
+        "prompt_3": "Input Question: {{Question.strip()}}\nOption A: {{A}}\nOption B: {{B}}\nOption C: {{C}}\nOption D: {{D}}\nPlease indicate the correct option from the list above:",
+        "prompt_4": "Critically analyze the question and select the most probable answer from the list:\n{{Question.strip()}}\nChoices:\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}",
+        "prompt_5": "Answer the question and pick the correct answer from the options: {{Question.strip()}}\nOptions:\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nPlease choose the correct option from the above list:",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "ara": "Arabic",
+        "swa": "Swahili",
+        "yor": "Yoruba",
+    }
+
+    lang2_code = {
+        "ara": "AR_XY",
+        "swa": "SW_KE",
+        "yor": "YO_NG",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"openai_mmlu_{lang}.yaml"
+            task_name = f"openai_mmlu_{lang}_{mode}"
+            yaml_template = "openai_mmlu"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang2_code[lang],
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/salt/README.md b/lm_eval/tasks/afrobench/salt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3c5239a05e88cbfbadf6670f96d6ed621b0d805c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/README.md
@@ -0,0 +1,17 @@
+#
+
+## Paper
+Title: `Sunbird African Language Technology (SALT) dataset`
+
+Paper Link: https://aclanthology.org/2023.emnlp-main.862/
+
+## Abstract
+>SALT is a multi-way parallel text and speech corpus of Engish and six languages widely spoken in Uganda and East Africa: Luganda, Lugbara, Acholi, Runyankole, Ateso and Swahili. The core of the dataset is a set of 25,000 sentences covering a range of topics of local relevance, such as agriculture, health and society. Each sentence is translated into all languages, to support machine translation, and speech recordings are made for approximately 5,000 of the sentences both by a variety of speakers in natural settings (suitable for ASR) and by professionals in a studio setting (suitable for text-to-speech).
+
+HomePage: https://github.com/SunbirdAI/salt
+
+### Publications
+
+Multilingual Model and Data Resources for Text-To-Speech in Ugandan Languages. Isaac Owomugisha, Benjamin Akera, Ernest Tonny Mwebaze, John Quinn. 4th Workshop on African Natural Language Processing, 2023. [pdf](https://openreview.net/pdf?id=vaxG0WAPzL)
+
+Machine Translation For African Languages: Community Creation Of Datasets And Models In Uganda. Benjamin Akera, Jonathan Mukiibi, Lydia Sanyu Naggayi, Claire Babirye, Isaac Owomugisha, Solomon Nsumba, Joyce Nakatumba-Nabende, Engineer Bainomugisha, Ernest Mwebaze, John Quinn. 3rd Workshop on African Natural Language Processing, 2022. [pdf](https://openreview.net/pdf?id=BK-z5qzEU-9)
diff --git a/lm_eval/tasks/afrobench/salt/gen_utils.py b/lm_eval/tasks/afrobench/salt/gen_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ac703a0d5d0912d38fb624dbba967ed3ffdb734
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/gen_utils.py
@@ -0,0 +1,149 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang, lang_dict):
+    language_column_name = f"{lang}_text"
+    prompt_map = {
+        "prompt_1": f"{lang_dict[lang]} sentence: {{{{{language_column_name}}}}} \nEnglish sentence: ",
+        "prompt_1_reverse": "English sentence: {{eng_source_text}} "
+        f"\n{lang_dict[lang]} sentence: ",
+        "prompt_2": f"You are a translation expert. Translate the following {lang_dict[lang]} sentences to English \n"
+        f"{lang_dict[lang]} sentence: {{{{{language_column_name}}}}}\nEnglish sentence: ",
+        "prompt_2_reverse": f"You are a translation expert. Translate the following English sentences to "
+        f"{lang_dict[lang]} "
+        "\nEnglish sentence: {{eng_source_text}} "
+        f"\n{lang_dict[lang]} sentence: ",
+        "prompt_3": f"As a {lang_dict[lang]} and English linguist, translate the following {lang_dict[lang]} sentences "
+        f"to English. \n{lang_dict[lang]} sentence: {{{{{language_column_name}}}}}\nEnglish sentence: ",
+        "prompt_3_reverse": f"As a {lang_dict[lang]} and English linguist, translate the following English sentences to "
+        f"{lang_dict[lang]}. "
+        "\nEnglish sentence: {{eng_source_text}} "
+        f"\n{lang_dict[lang]} sentence: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse: bool) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "eng": "English",
+        "lug": "Luganda",
+        "ach": "Acholi",
+        "lgg": "Lugbara",
+        "teo": "Ateso",
+        "nyn": "Runyankole",
+        "swa": "Swahili",
+        "ibo": "Igbo",
+    }
+
+    for lang in languages.keys():
+        try:
+            if lang != "eng":
+                if not reverse:
+                    file_name = f"salt_{lang}-eng.yaml"
+                    task_name = f"salt_{lang}-eng_{mode}"
+                    yaml_template = "salt"
+                    yaml_details = {
+                        "include": yaml_template,
+                        "task": task_name,
+                        "dataset_name": "text-all",
+                        "doc_to_target": "eng_target_text",
+                        "doc_to_text": prompt_func(mode, lang, languages),
+                    }
+                    os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+                    with open(
+                        f"{output_dir}/{mode}/{file_name}",
+                        "w" if overwrite else "x",
+                        encoding="utf8",
+                    ) as f:
+                        f.write("# Generated by utils.py\n")
+                        yaml.dump(
+                            yaml_details,
+                            f,
+                            allow_unicode=True,
+                        )
+                else:
+                    file_name = f"salt_eng-{lang}.yaml"
+                    task_name = f"salt_eng-{lang}_{mode}"
+                    yaml_template = "salt"
+                    yaml_details = {
+                        "include": yaml_template,
+                        "task": task_name,
+                        "dataset_name": "text-all",
+                        "doc_to_target": f"{lang}_text",
+                        "doc_to_text": prompt_func(f"{mode}_reverse", lang, languages),
+                    }
+                    os.makedirs(f"{output_dir}/{mode}", exist_ok=True)
+                    with open(
+                        f"{output_dir}/{mode}/{file_name}",
+                        "w" if overwrite else "x",
+                        encoding="utf8",
+                    ) as f:
+                        f.write("# Generated by utils.py\n")
+                        yaml.dump(
+                            yaml_details,
+                            f,
+                            allow_unicode=True,
+                        )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_1",
+        choices=["prompt_1", "prompt_2", "prompt_3"],
+        help="Prompt number",
+    )
+    parser.add_argument(
+        "--reverse",
+        default=True,
+        choices=[True, False],
+        help="Reverse the translation direction",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(
+        output_dir=args.output_dir,
+        overwrite=args.overwrite,
+        mode=args.mode,
+        reverse=args.reverse,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt b/lm_eval/tasks/afrobench/salt/prompt_1/salt
new file mode 100644
index 0000000000000000000000000000000000000000..a07d434a8bfb5e4c85abef6fe556e648c6fe5a00
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt
@@ -0,0 +1,24 @@
+tag:
+- salt_tasks
+- salt_prompt_1
+- afrobench_MT_tasks
+dataset_path: Sunbird/salt
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_ach-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_ach-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41731279817637401307fc9f55ecd96cd2a80794
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_ach-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Acholi sentence: {{ach_text}} \nEnglish sentence: "
+include: salt
+task: salt_ach-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-ach.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-ach.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..219e5780634f4812157ea6d2ad70b7b22e72ae49
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-ach.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: ach_text
+doc_to_text: "English sentence: {{eng_source_text}} \nAcholi sentence: "
+include: salt
+task: salt_eng-ach_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-ibo.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f90220591f5f7047da6d488740c759c850a95b1e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-ibo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: ibo_text
+doc_to_text: "English sentence: {{eng_source_text}} \nIgbo sentence: "
+include: salt
+task: salt_eng-ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-lgg.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-lgg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a038ddb39eb1b171be9e5631e129995ceeed64e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-lgg.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: lgg_text
+doc_to_text: "English sentence: {{eng_source_text}} \nLugbara sentence: "
+include: salt
+task: salt_eng-lgg_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-lug.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4539913786124aec4ea68f16538989a91131ca44
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-lug.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: lug_text
+doc_to_text: "English sentence: {{eng_source_text}} \nLuganda sentence: "
+include: salt
+task: salt_eng-lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-nyn.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-nyn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..448e1101d681d4f31bde8c81418d4f2f64b6eb13
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-nyn.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: nyn_text
+doc_to_text: "English sentence: {{eng_source_text}} \nRunyankole sentence: "
+include: salt
+task: salt_eng-nyn_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-swa.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..792b4840c2551627b66008fdd2c172e3660cc914
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-swa.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: swa_text
+doc_to_text: "English sentence: {{eng_source_text}} \nSwahili sentence: "
+include: salt
+task: salt_eng-swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-teo.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-teo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..810626c6a5ddf8525d45344b5a5eb7a2d65ab34e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_eng-teo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: teo_text
+doc_to_text: "English sentence: {{eng_source_text}} \nAteso sentence: "
+include: salt
+task: salt_eng-teo_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_ibo-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_ibo-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a98c8648081bc8c3e1fd1c897c41212701f36fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_ibo-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Igbo sentence: {{ibo_text}} \nEnglish sentence: "
+include: salt
+task: salt_ibo-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_lgg-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_lgg-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8e281ac3189dbaada8d21fbd4896a0c8478dbc6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_lgg-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Lugbara sentence: {{lgg_text}} \nEnglish sentence: "
+include: salt
+task: salt_lgg-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_lug-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_lug-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f924d5c50f82e1dbbf6be1dd4a138d2c5d61c5ac
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_lug-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Luganda sentence: {{lug_text}} \nEnglish sentence: "
+include: salt
+task: salt_lug-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_nyn-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_nyn-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd9363614648969391f20deb49fd2a92afdcfede
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_nyn-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Runyankole sentence: {{nyn_text}} \nEnglish sentence: "
+include: salt
+task: salt_nyn-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_swa-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_swa-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2308593e3d54e222d7543403f214dba76719a80
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_swa-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Swahili sentence: {{swa_text}} \nEnglish sentence: "
+include: salt
+task: salt_swa-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_1/salt_teo-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_1/salt_teo-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6efb4ea0807a9a66eb84797503b6bc4762777fd0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_1/salt_teo-eng.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "Ateso sentence: {{teo_text}} \nEnglish sentence: "
+include: salt
+task: salt_teo-eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt b/lm_eval/tasks/afrobench/salt/prompt_2/salt
new file mode 100644
index 0000000000000000000000000000000000000000..66355878cbb8354261bd426623d29589ce93383a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt
@@ -0,0 +1,24 @@
+tag:
+- salt_tasks
+- salt_prompt_2
+- afrobench_MT_tasks
+dataset_path: Sunbird/salt
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_ach-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_ach-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dda717b7942cb37c7f6d821070572cd302717639
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_ach-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Acholi sentences\
+  \ to English \nAcholi sentence: {{ach_text}}\nEnglish sentence: "
+include: salt
+task: salt_ach-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-ach.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-ach.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e4a72a5116a41a7d7950cfed80cbd826a37a0dc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-ach.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: ach_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Acholi \nEnglish sentence: {{eng_source_text}} \nAcholi sentence: "
+include: salt
+task: salt_eng-ach_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-ibo.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04649c1287e599a2ecdf376b4b30bc86700dcaca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: ibo_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Igbo \nEnglish sentence: {{eng_source_text}} \nIgbo sentence: "
+include: salt
+task: salt_eng-ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-lgg.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-lgg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ac6becbcb7b10890cb1b2cd56dbe43c23742683
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-lgg.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: lgg_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Lugbara \nEnglish sentence: {{eng_source_text}} \nLugbara sentence: "
+include: salt
+task: salt_eng-lgg_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-lug.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b5f6399cf6ddc5276fb48545e4ad3d1e0e4ab1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: lug_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Luganda \nEnglish sentence: {{eng_source_text}} \nLuganda sentence: "
+include: salt
+task: salt_eng-lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-nyn.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-nyn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..84452d5aed07b2fe13d6836a7656ff85dfa2ae8e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-nyn.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: nyn_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Runyankole \nEnglish sentence: {{eng_source_text}} \nRunyankole sentence: "
+include: salt
+task: salt_eng-nyn_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-swa.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..523db9fb7e913dff30b80d28ca13b8c613653ad6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: swa_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Swahili \nEnglish sentence: {{eng_source_text}} \nSwahili sentence: "
+include: salt
+task: salt_eng-swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-teo.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-teo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..000e8d043bb1897c5647480d6584191181b45c68
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_eng-teo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: teo_text
+doc_to_text: "You are a translation expert. Translate the following English sentences\
+  \ to Ateso \nEnglish sentence: {{eng_source_text}} \nAteso sentence: "
+include: salt
+task: salt_eng-teo_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_ibo-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_ibo-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b4ec6601af313b25606c05752715a3dfadf1476e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_ibo-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Igbo sentences\
+  \ to English \nIgbo sentence: {{ibo_text}}\nEnglish sentence: "
+include: salt
+task: salt_ibo-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_lgg-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_lgg-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d802c0faa99f895605e00c25aea3197b0fad7d2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_lgg-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Lugbara sentences\
+  \ to English \nLugbara sentence: {{lgg_text}}\nEnglish sentence: "
+include: salt
+task: salt_lgg-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_lug-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_lug-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..521bbf15c008670a0d71b671be84e58b9ca7290b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_lug-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Luganda sentences\
+  \ to English \nLuganda sentence: {{lug_text}}\nEnglish sentence: "
+include: salt
+task: salt_lug-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_nyn-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_nyn-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4cc4abfc26505ada2abb05775a6b4b43c67fb139
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_nyn-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Runyankole sentences\
+  \ to English \nRunyankole sentence: {{nyn_text}}\nEnglish sentence: "
+include: salt
+task: salt_nyn-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_swa-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_swa-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e80b9087df91df24f619c61c61c553decfcb1bf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_swa-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Swahili sentences\
+  \ to English \nSwahili sentence: {{swa_text}}\nEnglish sentence: "
+include: salt
+task: salt_swa-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_2/salt_teo-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_2/salt_teo-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0b0d516de9ae00758cd9fccb45c84d65eb069bc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_2/salt_teo-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "You are a translation expert. Translate the following Ateso sentences\
+  \ to English \nAteso sentence: {{teo_text}}\nEnglish sentence: "
+include: salt
+task: salt_teo-eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt b/lm_eval/tasks/afrobench/salt/prompt_3/salt
new file mode 100644
index 0000000000000000000000000000000000000000..51dac9c53b42569b2b5c7f19a5b9fa6b83fc68e4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt
@@ -0,0 +1,24 @@
+tag:
+- salt_tasks
+- salt_prompt_3
+- afrobench_MT_tasks
+dataset_path: Sunbird/salt
+dataset_kwargs: {trust_remote_code: True}
+output_type: generate_until
+validation_split: dev
+fewshot_split: dev
+test_split: test
+metric_list:
+  - metric: bleu
+    aggregation: bleu
+    higher_is_better: true
+  - metric: chrf
+    aggregation: chrf
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "\n"
+  do_sample: false
+  temperature: 0.0
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_ach-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_ach-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c198a59f843447475f221823361f7ddf919419c3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_ach-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Acholi and English linguist, translate the following Acholi sentences\
+  \ to English. \nAcholi sentence: {{ach_text}}\nEnglish sentence: "
+include: salt
+task: salt_ach-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-ach.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-ach.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..636a77d8606343d9de230547b958f8e49b448b5c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-ach.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: ach_text
+doc_to_text: "As a Acholi and English linguist, translate the following English sentences\
+  \ to Acholi. \nEnglish sentence: {{eng_source_text}} \nAcholi sentence: "
+include: salt
+task: salt_eng-ach_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-ibo.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44d015d6ca9db85477a082c687a46c7e46276068
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: ibo_text
+doc_to_text: "As a Igbo and English linguist, translate the following English sentences\
+  \ to Igbo. \nEnglish sentence: {{eng_source_text}} \nIgbo sentence: "
+include: salt
+task: salt_eng-ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-lgg.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-lgg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f1e6f43ba7783c2b521ee3a0caeec1d0904790e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-lgg.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: lgg_text
+doc_to_text: "As a Lugbara and English linguist, translate the following English sentences\
+  \ to Lugbara. \nEnglish sentence: {{eng_source_text}} \nLugbara sentence: "
+include: salt
+task: salt_eng-lgg_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-lug.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2065c30df12a680ca08b218ce3e842324313da4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: lug_text
+doc_to_text: "As a Luganda and English linguist, translate the following English sentences\
+  \ to Luganda. \nEnglish sentence: {{eng_source_text}} \nLuganda sentence: "
+include: salt
+task: salt_eng-lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-nyn.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-nyn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e48970a8ccb136d4328598224c370076949954b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-nyn.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: nyn_text
+doc_to_text: "As a Runyankole and English linguist, translate the following English\
+  \ sentences to Runyankole. \nEnglish sentence: {{eng_source_text}} \nRunyankole\
+  \ sentence: "
+include: salt
+task: salt_eng-nyn_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-swa.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cfd3f8eadb1ca0ff898595c897a3eebde72f08a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: swa_text
+doc_to_text: "As a Swahili and English linguist, translate the following English sentences\
+  \ to Swahili. \nEnglish sentence: {{eng_source_text}} \nSwahili sentence: "
+include: salt
+task: salt_eng-swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-teo.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-teo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8d280bb41808f8af50287861fa1131b92295e70
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_eng-teo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: teo_text
+doc_to_text: "As a Ateso and English linguist, translate the following English sentences\
+  \ to Ateso. \nEnglish sentence: {{eng_source_text}} \nAteso sentence: "
+include: salt
+task: salt_eng-teo_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_ibo-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_ibo-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..13be699cb1dc1255939321205d25921625cdb140
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_ibo-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Igbo and English linguist, translate the following Igbo sentences\
+  \ to English. \nIgbo sentence: {{ibo_text}}\nEnglish sentence: "
+include: salt
+task: salt_ibo-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_lgg-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_lgg-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7aa4ffc442c41ea9abd148257b1e49524173eca5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_lgg-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Lugbara and English linguist, translate the following Lugbara sentences\
+  \ to English. \nLugbara sentence: {{lgg_text}}\nEnglish sentence: "
+include: salt
+task: salt_lgg-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_lug-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_lug-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da505f6d7589d9a7bba4ea7be1c73134fc562a20
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_lug-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Luganda and English linguist, translate the following Luganda sentences\
+  \ to English. \nLuganda sentence: {{lug_text}}\nEnglish sentence: "
+include: salt
+task: salt_lug-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_nyn-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_nyn-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9edba7c495369e1849106e854100a65d0bda9ee5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_nyn-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Runyankole and English linguist, translate the following Runyankole\
+  \ sentences to English. \nRunyankole sentence: {{nyn_text}}\nEnglish sentence: "
+include: salt
+task: salt_nyn-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_swa-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_swa-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d01c9170c602c7eebdc3b0a5c216d5bdd4bc52a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_swa-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Swahili and English linguist, translate the following Swahili sentences\
+  \ to English. \nSwahili sentence: {{swa_text}}\nEnglish sentence: "
+include: salt
+task: salt_swa-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/prompt_3/salt_teo-eng.yaml b/lm_eval/tasks/afrobench/salt/prompt_3/salt_teo-eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c81336cac58f12d6dd2118315a6cdb64a913a2af
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/prompt_3/salt_teo-eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: text-all
+doc_to_target: eng_target_text
+doc_to_text: "As a Ateso and English linguist, translate the following Ateso sentences\
+  \ to English. \nAteso sentence: {{teo_text}}\nEnglish sentence: "
+include: salt
+task: salt_teo-eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/salt/salt.yaml b/lm_eval/tasks/afrobench/salt/salt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..edd3070d8ba2c24b651038ca7408a38b45e00da3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/salt/salt.yaml
@@ -0,0 +1,11 @@
+group: salt
+task:
+  - salt_prompt_1
+  - salt_prompt_2
+  - salt_prompt_3
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/sample_run_scripts/run_afrobench.sh b/lm_eval/tasks/afrobench/sample_run_scripts/run_afrobench.sh
new file mode 100644
index 0000000000000000000000000000000000000000..886c94956cc8204ce9fda69e912cec91424a3d92
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sample_run_scripts/run_afrobench.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+batch_size=5
+num_fewshot=0
+
+export CUDA_VISIBLE_DEVICES=0,1
+
+model_names=(
+  "google/gemma-1.1-7b-it",
+  "google/gemma-2-9b-it",
+  "google/gemma-2-27b-it",
+  "Jacaranda/AfroLlama_V1",
+  "LLaMAX/LLaMAX3-8B-Alpaca",
+  "meta-llama/Llama-2-7b-chat-hf",
+  "meta-llama/Llama-3.1-8B-Instruct",
+  "meta-llama/Llama-3.1-70B-Instruct",
+  "meta-llama/Meta-Llama-3-8B-Instruct",
+  "CohereForAI/aya-101"
+)
+
+for model_name in "${model_names[@]}"
+do
+    echo "Running model: $model_name"
+    lm_eval --model hf \
+    --model_args pretrained=${model_names},parallelize=true \
+    --tasks  afrobench\
+    --batch_size ${batch_size} \
+    --num_fewshot ${num_fewshot} \
+    --verbosity DEBUG \
+    --output_path 'path_to_results/' \
+    --log_samples
+done
diff --git a/lm_eval/tasks/afrobench/sample_run_scripts/run_afrobench_lite.sh b/lm_eval/tasks/afrobench/sample_run_scripts/run_afrobench_lite.sh
new file mode 100644
index 0000000000000000000000000000000000000000..89291faadb97fa9267d09be80e81a7b480aabcb5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sample_run_scripts/run_afrobench_lite.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+batch_size=5
+num_fewshot=0
+
+export CUDA_VISIBLE_DEVICES=0,1
+
+model_names=(
+  "google/gemma-1.1-7b-it",
+  "google/gemma-2-9b-it",
+  "google/gemma-2-27b-it",
+  "Jacaranda/AfroLlama_V1",
+  "LLaMAX/LLaMAX3-8B-Alpaca",
+  "meta-llama/Llama-2-7b-chat-hf",
+  "meta-llama/Llama-3.1-8B-Instruct",
+  "meta-llama/Llama-3.1-70B-Instruct",
+  "meta-llama/Meta-Llama-3-8B-Instruct",
+  "CohereForAI/aya-101"
+)
+
+for model_name in "${model_names[@]}"
+do
+    echo "Running model: $model_name"
+    lm_eval --model hf \
+    --model_args pretrained=${model_name},parallelize=true \
+    --tasks afrobench_lite\
+    --batch_size ${batch_size} \
+    --num_fewshot ${num_fewshot} \
+    --verbosity DEBUG \
+    --output_path 'path_to_results/' \
+    --log_samples
+done
diff --git a/lm_eval/tasks/afrobench/sib/README.md b/lm_eval/tasks/afrobench/sib/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..732db84b0eb6ad373442692b221e7f97e18e112a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/README.md
@@ -0,0 +1,37 @@
+#
+
+## Paper
+Title: `SIB-200: A Simple, Inclusive, and Big Evaluation Dataset for Topic Classification in 200+ Languages and Dialects`
+
+Paper Link: https://aclanthology.org/2024.eacl-long.14/
+
+## Abstract
+>Despite the progress in building multilingual language models, evaluation is often limited to a few languages with available datasets which excludes a large number of low-resource languages. In this paper, we create SIB-200—a large-scale open-sourced benchmark dataset for topic classification in 205 languages and dialects to address the lack of evaluation dataset for Natural Language Understanding (NLU). For many of the languages covered in SIB-200, this is the first publicly available evaluation dataset for NLU. The dataset is based on Flores-200 machine translation corpus. We annotated the English portion of the dataset and extended the sentence-level annotation to the remaining 204 languages covered in the corpus. Despite the simplicity of this task, our evaluation in full-supervised setting, cross-lingual transfer setting and prompting of large language model setting show that there is still a large gap between the performance of high-resource and low-resource languages when multilingual evaluation is scaled to numerous world languages. We found that languages unseen during the pre-training of multilingual language models, languages from under-represented families (like Nilotic and Altantic-Congo), and languages from the regions of Africa, Americas, Oceania and South East Asia, often have the lowest performance on our topic classification dataset. We hope our dataset %will encourages a more inclusive evaluation of multilingual language models on a more diverse set of languages.
+
+HomePage: https://github.com/dadelani/sib-200
+
+### Citation
+
+```
+@inproceedings{adelani-etal-2024-sib,
+    title = "{SIB}-200: A Simple, Inclusive, and Big Evaluation Dataset for Topic Classification in 200+ Languages and Dialects",
+    author = "Adelani, David Ifeoluwa  and
+      Liu, Hannah  and
+      Shen, Xiaoyu  and
+      Vassilyev, Nikita  and
+      Alabi, Jesujoba O.  and
+      Mao, Yanke  and
+      Gao, Haonan  and
+      Lee, En-Shiun Annie",
+    editor = "Graham, Yvette  and
+      Purver, Matthew",
+    booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
+    month = mar,
+    year = "2024",
+    address = "St. Julian{'}s, Malta",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2024.eacl-long.14/",
+    pages = "226--245",
+    abstract = "Despite the progress in building multilingual language models, evaluation is often limited to a few languages with available datasets which excludes a large number of low-resource languages. In this paper, we create SIB-200{---}a large-scale open-sourced benchmark dataset for topic classification in 205 languages and dialects to address the lack of evaluation dataset for Natural Language Understanding (NLU). For many of the languages covered in SIB-200, this is the first publicly available evaluation dataset for NLU. The dataset is based on Flores-200 machine translation corpus. We annotated the English portion of the dataset and extended the sentence-level annotation to the remaining 204 languages covered in the corpus. Despite the simplicity of this task, our evaluation in full-supervised setting, cross-lingual transfer setting and prompting of large language model setting show that there is still a large gap between the performance of high-resource and low-resource languages when multilingual evaluation is scaled to numerous world languages. We found that languages unseen during the pre-training of multilingual language models, languages from under-represented families (like Nilotic and Altantic-Congo), and languages from the regions of Africa, Americas, Oceania and South East Asia, often have the lowest performance on our topic classification dataset. We hope our dataset {\%}will encourages a more inclusive evaluation of multilingual language models on a more diverse set of languages."
+}
+```
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib b/lm_eval/tasks/afrobench/sib/prompt_1/sib
new file mode 100644
index 0000000000000000000000000000000000000000..37fda5d192dc8b4e1aa115d66858876e6bca3bda
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib
@@ -0,0 +1,43 @@
+tag:
+    - sib_tasks
+    - sib_prompt_1
+    - afrobench_TC_tasks
+dataset_path: Davlan/sib200
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: category
+doc_to_choice:
+    - "science/technology"
+    - "travel"
+    - "politics"
+    - "sports"
+    - "health"
+    - "entertainment"
+    - "geography"
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_aeb.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_aeb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4116035df2599f79d31293b25abf43191943abd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_aeb.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aeb_Arab
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_aeb_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_afr.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..001eee846bd92a3e1703d64d799e5bc8c066f70e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_afr.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_afr_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_aka.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_aka.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..907977dc638bdfc7aba5ea11324d54667cd21d1e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_aka.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aka_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_aka_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_amh.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dde5420724bdb678ac877c5ff895df74ba0b08c6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_amh_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_ary.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68347bd51439c95b88403f843fb78a06a3562d39
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ary.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_ary_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_arz.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c0328134c766bd56637a2097f1b87bfa03a4973
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_arz.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_arz_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_bam.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5469a8a17ea44b468172c326a148f1185a559015
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_bam.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_bam_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_bem.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..01aaa1cbd82342de4ace8c11387f1851a21661d1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_bem.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_bem_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_cjk.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_cjk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6deaee753f460189a1fcf47c800239b2242ccf8d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_cjk.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: cjk_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_cjk_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_dik.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_dik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d80d0a080890269475d0133cb4a73cc80ffbe6eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_dik.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dik_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_dik_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_dyu.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_dyu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d72e6321e92d7e8947cce5109d363f7eb51f9de
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_dyu.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dyu_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_dyu_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_eng.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e32469681e4400517131926dff6e8b1a717b69d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_eng_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_ewe.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..60cf7db830a1aff9215989f4af5c9a6f8d278985
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_ewe_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_fon.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ae765522ccd81ddadd2842bb7e8a346fff18088
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_fon.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fon_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_fon_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_fra.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4614e6d27f2d41e5558045d933df41a66a909cfb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_fra_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_fuv.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24f1d28a8f088d383bf7fbbff939dc73b4cf447e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_fuv.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_fuv_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_gaz.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df904f957318c08eaf8c2f5cba4d0befa5220fbc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_gaz.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_gaz_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_hau.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b160b8cfc0aa662bfadcc68f2891208e7039c01b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_hau_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_ibo.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e481aeacd5c7d63cbfd11e7efcb3fb1ac738e945
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_ibo_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kab.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a914b01cc54c35941cd769dbe6667ee624421b91
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kab_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kab_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kam.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aaa05108b0cc3313932e71a174b0f53e747e42eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kam.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kam_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kam_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kbp.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kbp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d428490863c573b3a757672bc3c074d8fd548c0d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kbp.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kbp_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kbp_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kea.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e458fb225b8c1b4b4ee2823f46b4b4ad7a6dcad
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kea.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kea_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kik.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..beb94a8edb7cec7b51c960fe319a98e798a84581
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kik.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kik_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kik_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kin.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c16432eba27800bcc8eb927e4a201aac7b3f2e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kin_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kmb.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kmb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c46477e31e4639a9b9c1dca0ce59318534e883e4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kmb.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kmb_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kmb_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_knc.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_knc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b43157e3642dc91b6f04de06ecc622b67fb036e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_knc.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: knc_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_knc_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_kon.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..def4a77def17ff2d11cc00d6c87962a03c4081cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_kon.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kon_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_kon_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_lin.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbba95e0cf7217c4385f4601a7867ffc6576b2b7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_lin_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_lua.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_lua.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4bc665b3f9e6fce1703b2ea53c93bcc52111363
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_lua.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lua_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_lua_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_lug.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cbf42e1889e695a379d8261bac27d02fa7f4d33d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_lug_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_luo.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a62ea03c7ba534928d5c3c333d631216cf0dd248
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_luo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_luo_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_mos.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..54140a5d1339758f59a3504d3a4a0a5448414b90
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_mos.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mos_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_mos_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_nso.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f7382d58f3071f1dddcab360ecf06b2cf7a427c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_nso.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_nso_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_nus.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_nus.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..28208912f85036d26b494ed495b43f2a57982869
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_nus.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nus_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_nus_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_nya.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6ca90a9233e68301406a3303ebbb85cb47da2207
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_nya.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_nya_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_plt.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..650b9a4b711f30b59c0aadde797e941d69a17ed6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_plt.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_plt_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_por.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7901e924a043d74dadbf8b0dabff2303273d03a5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_por.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_por_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_run.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..510fc5c15841c9c130af5cce0e3e2d8499eb71d0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_run.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: run_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_run_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_sag.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_sag.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e7c0bb3148857edab4e8eaef00974fa5e4dfd974
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_sag.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sag_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_sag_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_sna.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f4115112c393c0dd424b14bdd66046d58e82eb83
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_sna_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_som.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be9c19f1039b8093e3c5bcd7573168b23f6e923c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_som.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_som_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_sot.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78d0e1f50dc0909475131e7892bbe726f5144412
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_sot_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_ssw.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..988f6828cbe84bdf7cec2a03798a452956e1768d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_ssw.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_ssw_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_swa.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4a92192eb750ed34c647381ae0c8655b141f4a2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_swa_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_taq.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_taq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a860f019dd0d259ea3fd9eddfb776870ea24b7f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_taq.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: taq_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_taq_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_tir.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..606755c5c59c01cdd1148437cdfccb4791ebc689
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tir.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_tir_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_tso.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6b2e46369554e76573e3cdec7128b56d9853913
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tso.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_tso_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_tum.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tum.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9e17521fb63ca03a4b38747157cf0171dcb2cf13
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tum.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tum_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_tum_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_twi.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf818808af1a80bfa7cfad46b5f16930b5619636
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_twi_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_tzm.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tzm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10cf4c5b6626fe9ffc3addfbc8197156e23ee45f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_tzm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_tzm_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_umb.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_umb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d171c9c6b6fd7f2db5ac205e3adfed2e6e6fb867
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_umb.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: umb_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_umb_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_wol.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3a6d7e6234c0ac9df872fb3cfcbc1e9f0e4f483
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_wol_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_xho.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..57ce4d2db833add543832e1798a90b7479d8a360
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_xho_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_yor.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cab811762f3b61828cb698857e0d46f33855f568
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_yor_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/sib_zul.yaml b/lm_eval/tasks/afrobench/sib/prompt_1/sib_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..694ddfc11f55e33b04544bde8a2004939e8bb158
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/sib_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: "Given the categories science/technology, travel, politics, sports, health,\
+  \ entertainment, or geography; what category does the text: '{{text}}' belong to:\
+  \ \n\n"
+include: sib
+task: sib_zul_prompt_1
diff --git a/lm_eval/tasks/afrobench/sib/prompt_1/utils.py b/lm_eval/tasks/afrobench/sib/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_1/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib b/lm_eval/tasks/afrobench/sib/prompt_2/sib
new file mode 100644
index 0000000000000000000000000000000000000000..27dd7d1f64838b9692fbaa06ea98c6cd7f7db97e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib
@@ -0,0 +1,43 @@
+tag:
+    - sib_tasks
+    - sib_prompt_2
+    - afrobench_TC_tasks
+dataset_path: Davlan/sib200
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: category
+doc_to_choice:
+    - "science/technology"
+    - "travel"
+    - "politics"
+    - "sports"
+    - "health"
+    - "entertainment"
+    - "geography"
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_aeb.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_aeb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32b2443948fd04761dab4331d9421b50e6293397
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_aeb.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: aeb_Arab
+doc_to_text: 'Does this Tunisian Arabic topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_aeb_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_afr.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c212b13f1f2cf4cd9a2b5b70fce75429a2dbbd91
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_afr.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: 'Does this Afrikaans topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_afr_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_aka.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_aka.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dacfef07608dfebc67025bc8ff983260ee535f6b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_aka.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: aka_Latn
+doc_to_text: 'Does this Akan topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_aka_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_amh.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..259009f056b82ff8968feb9df082f7c232845124
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_amh.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: 'Does this Amharic topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_amh_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_ary.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..141a6691de71e7f932970dd3a73c91aa818c45b2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ary.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: 'Does this Moroccan Arabic topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_ary_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_arz.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2fee5eed9e22ca4448a5aa1efe26e756ed41562
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_arz.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: 'Does this Egyptian Arabic topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_arz_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_bam.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ae5ddd0ea44b3d4c9a90e40125b942cd1919d26
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_bam.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: 'Does this Bambara topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_bam_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_bem.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1631a349226b60b9de250b4f97db8e474094951e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_bem.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_text: 'Does this Bemba topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_bem_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_cjk.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_cjk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85521f131a3532d7791bc3c022572bb624fd653c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_cjk.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: cjk_Latn
+doc_to_text: 'Does this Chokwe topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_cjk_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_dik.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_dik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c660516f42e0e869c8a266d113e65dcbbbf8f032
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_dik.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: dik_Latn
+doc_to_text: 'Does this Southwestern Dinka topic; ''{{text}}'' belong to one of the
+  following categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_dik_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_dyu.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_dyu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..698782fda2a65ea766eef9b91381d497949005ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_dyu.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: dyu_Latn
+doc_to_text: 'Does this Dyula topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_dyu_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_eng.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..564d86565f8aa47d9944d3a5aedc9555ca29c9a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_eng.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: 'Does this English topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_eng_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_ewe.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba064082941553b1177d6c4ea4901e6aa7ba61be
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ewe.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_text: 'Does this Ewe topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_ewe_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_fon.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bb542dd84452cdd500d01a6e561c408e7a7fcf1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_fon.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: fon_Latn
+doc_to_text: 'Does this Fon topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_fon_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_fra.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf279d611378a2a1981415940a692389728fd339
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_fra.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: 'Does this French topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_fra_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_fuv.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..50bb4b824748d070ad7d004efd15d2bab5cd8c0f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_fuv.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: 'Does this Nigerian Fulfulde topic; ''{{text}}'' belong to one of the
+  following categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_fuv_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_gaz.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..601d5f79f2605a3c0db8278500ecce1f5987222a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_gaz.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: 'Does this West Central Oromo topic; ''{{text}}'' belong to one of the
+  following categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_gaz_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_hau.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c7255d4747d4b5a033129bca1be441114bef36f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_hau.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: 'Does this Hausa topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_hau_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_ibo.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..714c132f655a0b57e9c160a513a6c73350c5919c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ibo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: 'Does this Igbo topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_ibo_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kab.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22303a3fbb1db518170ee57c258cff95c9f2c134
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kab.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kab_Latn
+doc_to_text: 'Does this Kabyle topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kab_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kam.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..498781d6e836f9854a7703f669e80b3a16003637
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kam.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kam_Latn
+doc_to_text: 'Does this Kamba topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_kam_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kbp.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kbp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..679d7ccd7a74430df674154fa03af065fc4e23a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kbp.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kbp_Latn
+doc_to_text: 'Does this Kabiye topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kbp_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kea.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aee33cf27faf2fed8b6873b601a5a437dae11bb7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kea.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: 'Does this Kabuverdianu topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kea_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kik.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77c87bc131b912e0564156acf740cb5aa3007615
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kik.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kik_Latn
+doc_to_text: 'Does this Kikuyu topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kik_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kin.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5be0643e11f39513363994cc6bbc02ac1604f24c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kin.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: 'Does this Kinyarwanda topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kin_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kmb.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kmb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..02f4e9d22410d932c345df2eeb1b4de1c3e71c4b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kmb.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kmb_Latn
+doc_to_text: 'Does this Kimbundu topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kmb_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_knc.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_knc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2623c480235bbf269b082a6604af129bb82e7df4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_knc.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: knc_Latn
+doc_to_text: 'Does this Central Kanuri topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_knc_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_kon.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ec3bcf97652bde14ee764bf961ea49aca088df4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_kon.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kon_Latn
+doc_to_text: 'Does this Kikongo topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_kon_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_lin.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec2fa57a8bbc4309bbb44a865568a2cf70b842e4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_lin.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: 'Does this Lingala topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_lin_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_lua.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_lua.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f3acc3dbeb2b708257c9b5f1fcc7cac4a703d54
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_lua.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lua_Latn
+doc_to_text: 'Does this Luba-Kasai topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_lua_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_lug.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d6e7b9f0c315b7868580704abc3cdba0775cc65
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_lug.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: 'Does this Luganda topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_lug_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_luo.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d1a438594b1818e3fe34c9ce63e47e0c802e700
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_luo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: 'Does this Luo topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_luo_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_mos.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc025905e76696e820804e4989e9b1bec2fa2257
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_mos.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: mos_Latn
+doc_to_text: 'Does this Mossi topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_mos_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_nso.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75021cc514b64598cd1e94902ecdd653db50681e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_nso.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_text: 'Does this Northern Sotho topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_nso_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_nus.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_nus.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abca40e85705feeaae8fb118ae9d162c611e0545
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_nus.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: nus_Latn
+doc_to_text: 'Does this Nuer topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_nus_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_nya.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e09e27331ad6104a7c585f63638b4e24e9ba8880
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_nya.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: 'Does this Nyanga topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_nya_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_plt.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5b385cade643f329904a7a0dab0797e57433581
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_plt.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: 'Does this Plateau Malagasy topic; ''{{text}}'' belong to one of the
+  following categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_plt_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_por.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a297c05a4be4992f33fb07737bd704ab076c9cfe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_por.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: 'Does this Portuguese topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_por_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_run.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4bb32245653846c6eb82fef3716b31ba85adf4d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_run.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: run_Latn
+doc_to_text: 'Does this Rundi topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_run_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_sag.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_sag.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..979b4d84e0dae472a83fdb64e7b62c35453763e9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_sag.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sag_Latn
+doc_to_text: 'Does this Sango topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_sag_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_sna.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b41184b3c702c2346484fb6a982a1f9a10fe6516
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_sna.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: 'Does this Shona topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_sna_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_som.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cda1fb4133df8f42bf69a0e296b866cd128ef368
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_som.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: 'Does this Somali topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_som_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_sot.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08d0dbecbec823c712108c42d64f9a7cbed73463
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_sot.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: 'Does this Southern Sotho topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_sot_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_ssw.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d3b99e7a07affa07aeb7ad4452887808fbd47de
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_ssw.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: 'Does this Swazi topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_ssw_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_swa.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8e9faa831698a196843ae2f6b5f8cf4939bdada0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_swa.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: 'Does this Swahili topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_swa_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_taq.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_taq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1862c468c6c874e56fca81c9bbc0df09c91425e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_taq.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: taq_Latn
+doc_to_text: 'Does this Tamasheq topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_taq_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_tir.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..80dcc1bb3d8d6a65ea6dcdf75af3c946a803b071
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tir.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: 'Does this Tigrinya topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_tir_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_tso.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fad909b4b7a714ba44df14041808d54e2dc7edc2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tso.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: 'Does this Tsonga topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_tso_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_tum.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tum.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..613535bc95647f5edf818e844423eebad5291937
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tum.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tum_Latn
+doc_to_text: 'Does this Tumbuka topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_tum_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_twi.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..064edb4cb8e0e1a88dbc1ccfad20adefa13034e9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_twi.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: twi_Latn
+doc_to_text: 'Does this Twi topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_twi_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_tzm.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tzm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ec8adc260622a611261d30aebd49450d202b700
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_tzm.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng
+doc_to_text: 'Does this Tamazight topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_tzm_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_umb.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_umb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a910abc5fa30c9462c950f059059f5f91554b3a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_umb.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: umb_Latn
+doc_to_text: 'Does this Umbundu topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_umb_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_wol.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4453b3458ecde8bbc26cff73793db71471301850
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_wol.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: 'Does this Wolof topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_wol_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_xho.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e038cc9458fbb376331296bd4dd1c96a5f26a8f1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_xho.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: 'Does this Xhosa topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_xho_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_yor.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e831b3117b828bb4ea68016ca19fdcd9c89525b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_yor.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: 'Does this Yoruba topic; ''{{text}}'' belong to one of the following
+  categories: science/technology, travel, politics, sports, health, entertainment,
+  or geography? category only
+
+
+  '
+include: sib
+task: sib_yor_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/sib_zul.yaml b/lm_eval/tasks/afrobench/sib/prompt_2/sib_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f61a4061f2b636167400491e59db890289aff3d3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/sib_zul.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: 'Does this Zulu topic; ''{{text}}'' belong to one of the following categories:
+  science/technology, travel, politics, sports, health, entertainment, or geography?
+  category only
+
+
+  '
+include: sib
+task: sib_zul_prompt_2
diff --git a/lm_eval/tasks/afrobench/sib/prompt_2/utils.py b/lm_eval/tasks/afrobench/sib/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_2/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib b/lm_eval/tasks/afrobench/sib/prompt_3/sib
new file mode 100644
index 0000000000000000000000000000000000000000..fed4e5c5019f791c72cfbe214efb2698943c5b92
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib
@@ -0,0 +1,43 @@
+tag:
+    - sib_tasks
+    - sib_prompt_3
+    - afrobench_TC_tasks
+dataset_path: Davlan/sib200
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: category
+doc_to_choice:
+    - "science/technology"
+    - "travel"
+    - "politics"
+    - "sports"
+    - "health"
+    - "entertainment"
+    - "geography"
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_aeb.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_aeb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b82cc4ec3cc8cff2dff2818f9238477ea12528a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_aeb.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: aeb_Arab
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Tunisian Arabic statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_aeb_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_afr.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f818759646be15a6c6d1c0193a7b24deb730bb03
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_afr.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Afrikaans statement below? Return only\
+  \ the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_afr_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_aka.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_aka.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d4ff4e42cf1d7c926015812ea5c716928b697fc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_aka.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: aka_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Akan statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_aka_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_amh.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..58207e9e39010c4f30d3ff0a1f46fd9e53f3b042
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_amh.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Amharic statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_amh_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_ary.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ccb9a06880d6aa946ffd750429da3fb650c46eea
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ary.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Moroccan Arabic statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_ary_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_arz.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19ebbed7b9a441cca520eff58a663354d29a7395
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_arz.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Egyptian Arabic statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_arz_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_bam.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2c1a18d9b3a4b076ca70b32981b2ecb58e3f9c0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_bam.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Bambara statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_bam_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_bem.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..99750497c258c93b1393ced6f34b9a724fbe518d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_bem.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Bemba statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_bem_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_cjk.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_cjk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..470612b51b1f7d4821573764692a04f2a623a42f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_cjk.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: cjk_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Chokwe statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_cjk_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_dik.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_dik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5269b0262805239b807d789a7343dc0d1507a29
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_dik.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: dik_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Southwestern Dinka statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_dik_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_dyu.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_dyu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f04a1c17199e50ecebfac887459b3c3f124a1529
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_dyu.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: dyu_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Dyula statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_dyu_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_eng.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf22d08fcab877aae2ce77081274d1928e27f8c4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_eng.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the English statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_eng_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_ewe.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4cc991048ad19285b1dd269e91a6bb32898b6d88
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ewe.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Ewe statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_ewe_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_fon.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3127fde242956bcaf89224ed3a88be80dc967c52
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_fon.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fon_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Fon statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_fon_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_fra.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a24ff30e4f6408f02a0f4a8978250d91e36621f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_fra.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the French statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_fra_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_fuv.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..405838c78ddbe6a99d66f436699de00f0b7e814b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_fuv.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Nigerian Fulfulde statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_fuv_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_gaz.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..282b439a3c2d6703c04446b4cf477a9bd60bf340
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_gaz.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the West Central Oromo statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_gaz_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_hau.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..239181bf1f1586ac83aa1741e9cffe531c2433b1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_hau.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Hausa statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_hau_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_ibo.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0581291dd7ed5cf53e52fe2f44154363b8be9599
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ibo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Igbo statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_ibo_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kab.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32fbbf4407d07a5d78558b022260117f592645d7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kab.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kab_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kabyle statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kab_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kam.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f745ba54f9daaca1a3302443c4a5aba3de795f4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kam.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kam_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kamba statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kam_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kbp.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kbp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5be1035bb58862fe73c2678287620d409a76b87
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kbp.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kbp_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kabiye statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kbp_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kea.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a1d3e2a68cf8b465f589701f1004ae4b5dc07dd9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kea.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kabuverdianu statement below? Return only\
+  \ the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kea_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kik.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..521a0f89226460e6f1a9e25c0d24066cd929c662
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kik.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kik_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kikuyu statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kik_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kin.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..066bfb630c59e334f65dcd74ea536a6790b3337d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kinyarwanda statement below? Return only\
+  \ the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kin_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kmb.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kmb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c548af893d77f13231cab13318e396cbcf423388
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kmb.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kmb_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kimbundu statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kmb_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_knc.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_knc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9136823770a1e6754070143b2b0e40a988da22f4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_knc.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: knc_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Central Kanuri statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_knc_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_kon.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8777511ef33fad4019d3e157d1dbc4f6d0aad96
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_kon.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: kon_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Kikongo statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_kon_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_lin.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8688cb875fa5554625073325810a9dbb1198f06b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_lin.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Lingala statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_lin_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_lua.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_lua.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e71ac2aae77f40f06796c1572b2d38b44ec53962
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_lua.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lua_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Luba-Kasai statement below? Return only\
+  \ the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_lua_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_lug.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3554267ebad03a604a4a3dcca369af535efb156
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_lug.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Luganda statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_lug_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_luo.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..161814d36bde3e61fe3cdf38e98d2ef62f6b9248
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_luo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Luo statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_luo_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_mos.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b80d5008087bf66a82b8b7855fc5b8c857497fe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_mos.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: mos_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Mossi statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_mos_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_nso.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5c9dd8bd3f8cd30ce172286ca005c16b0ead9214
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_nso.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Northern Sotho statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_nso_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_nus.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_nus.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..361698af10f6f231fbbdabf9e92a287504c45057
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_nus.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: nus_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Nuer statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_nus_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_nya.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c455c788ad7fc13a440885b6c7fc594ed4fc6e4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_nya.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Nyanga statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_nya_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_plt.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb90a034be0e94aba92823ba3b5762fc13eabe6f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_plt.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Plateau Malagasy statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_plt_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_por.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65b8c2da4ab91e1723ceabd2e9fb08d3b6de2cfe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_por.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Portuguese statement below? Return only\
+  \ the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_por_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_run.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19f3681cf856c7bc28bb1fcb5e8c31eda1f1b618
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_run.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: run_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Rundi statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_run_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_sag.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_sag.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dfdcbd41929bc5747c34f18e93633fec8ac04e6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_sag.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sag_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Sango statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_sag_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_sna.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f30ff0d2b995c831b4005318a18a723998c92aa8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_sna.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Shona statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_sna_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_som.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ea27fd2e1b298324e1e1abcff152b25cd9cfc3d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_som.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Somali statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_som_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_sot.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4ad477db4c912a09e317c7edc7818fe96b355f7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_sot.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Southern Sotho statement below? Return\
+  \ only the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_sot_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_ssw.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25b7f85e1c955207a6afe5d154ed4286602a5313
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_ssw.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Swazi statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_ssw_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_swa.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7be0be9d211d0fce16493b3b62d593a7ad60b864
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_swa.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Swahili statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_swa_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_taq.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_taq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a7e7b3abbbbca622c1b56169a7abc6c917d9b241
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_taq.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: taq_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Tamasheq statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_taq_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_tir.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aceb352596ba9fed4c4a3a544beb616923dca213
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tir.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Tigrinya statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_tir_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_tso.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..025b7163c069a6291dc4691a34de44732cc4c8b7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tso.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Tsonga statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_tso_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_tum.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tum.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..35092ea79435767ad3e4907e152273d2cd6f1dca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tum.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tum_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Tumbuka statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_tum_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_twi.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc75f6579cdb57391a21bbce3a49fb062d7263f9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_twi.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: twi_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Twi statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_twi_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_tzm.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tzm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9b3044cdd08f81c95edfaeb8ded07d4a1da919d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_tzm.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Tamazight statement below? Return only\
+  \ the category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_tzm_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_umb.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_umb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d8bb8540180f44e612ea82e5276749d104362492
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_umb.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: umb_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Umbundu statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_umb_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_wol.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..115796d5276b8739efb6fccfb9064b5f4bb6a27e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_wol.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Wolof statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_wol_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_xho.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b61c84b700da4e798848d52ed2310c9cb5ee3467
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_xho.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Xhosa statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_xho_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_yor.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c5ccd0c738eb5d1d365e5d7342ebe4eaaf7686b8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_yor.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Yoruba statement below? Return only the\
+  \ category. \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_yor_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/sib_zul.yaml b/lm_eval/tasks/afrobench/sib/prompt_3/sib_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4915989dbbb0849b3345a80a420daa18a37eb97b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/sib_zul.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: "You are an assistant able to classify topics in texts. \n\nGiven the\
+  \ categories science/technology, travel, politics, sports, health, entertainment,\
+  \ or geography; what is the topic of the Zulu statement below? Return only the category.\
+  \ \n\ntext: {{text}} \\category:\n\n"
+include: sib
+task: sib_zul_prompt_3
diff --git a/lm_eval/tasks/afrobench/sib/prompt_3/utils.py b/lm_eval/tasks/afrobench/sib/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_3/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib b/lm_eval/tasks/afrobench/sib/prompt_4/sib
new file mode 100644
index 0000000000000000000000000000000000000000..28ed8f4a0da4e25815ebcfa6e58092a382e1708e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib
@@ -0,0 +1,43 @@
+tag:
+    - sib_tasks
+    - sib_prompt_4
+    - afrobench_TC_tasks
+dataset_path: Davlan/sib200
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: category
+doc_to_choice:
+    - "science/technology"
+    - "travel"
+    - "politics"
+    - "sports"
+    - "health"
+    - "entertainment"
+    - "geography"
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_aeb.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_aeb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8c737f278122c8893e028ea2334ff93646a73cf
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_aeb.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aeb_Arab
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_aeb_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_afr.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7139d04e9a5b4a4865ba11941d3078802cc9a85c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_afr.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_afr_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_aka.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_aka.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..59c8c56a6b78ffbb0da5e3b3abeb24ccd13b35d1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_aka.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: aka_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_aka_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_amh.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cec6b6c43425195e36be88cbdc266d8806844a24
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_amh.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_amh_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_ary.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c10743470b814bf689bfef10410af3b4e03bb84
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ary.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_ary_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_arz.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1740975a66196d9c4c3bd6780ad50281766cb0b2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_arz.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_arz_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_bam.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33ee240e6d95a0e43426b514f5e33f696526faeb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_bam.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_bam_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_bem.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa5608e849606f6d63f371b1bd7362d355b7d42b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_bem.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_bem_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_cjk.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_cjk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52e08d7b8c5dc10d6c35a5b4fa4deee9b494f2d5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_cjk.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: cjk_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_cjk_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_dik.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_dik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c8db6013f1d2a63d8242ac59d55c3f006f01e660
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_dik.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dik_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_dik_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_dyu.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_dyu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9bbc0b547f3b25150eb000d4e56bb6e24e86991
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_dyu.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: dyu_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_dyu_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_eng.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1c84749120e002dee47446d05600d81ed14bc193
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_eng.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_eng_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_ewe.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..02e7ea822fee11a3d0b3869ef3ea493048a114da
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ewe.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_ewe_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_fon.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67053ed8cd739682270062acea206792a7df5679
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_fon.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fon_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_fon_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_fra.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2b858ce4e502334f8440c8551b1bcd10feb3b15
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_fra.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_fra_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_fuv.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c73f82679a48664a468f8e36432a0e33399190c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_fuv.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_fuv_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_gaz.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba95ef5d8ee884a65befdab1a83853686d8b8ef5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_gaz.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_gaz_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_hau.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d53794868c164768810226db74aab7f06ccb383
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_hau.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_hau_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_ibo.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2683d98dba0ca644b5314ef96a1359571a83fe9d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ibo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_ibo_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kab.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f645a4598e2de1ccc45de14274a882b26deceb7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kab.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kab_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kab_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kam.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f035b89505f2f6ef889addc2af1c972efc8ff2d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kam.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kam_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kam_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kbp.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kbp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c65b6352e1dd12d2a7d511825f9c043ad213aebc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kbp.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kbp_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kbp_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kea.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0e7bba4ae7a6c359568f6252edaebd0bee96c860
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kea.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kea_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kik.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06480d183bb1d2765f8d23e8dda80ee6c37c029e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kik.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kik_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kik_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kin.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b447219fb3bafaf2a81e3ac727e5216f408893f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kin_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kmb.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kmb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5fc51890964f59d20f53b06cf3ddbdb02b444471
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kmb.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kmb_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kmb_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_knc.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_knc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..326443318488c921765094a96c84adb8e208eda8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_knc.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: knc_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_knc_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_kon.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6da4ab390d0e433391e313dea2c82d302d090dd2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_kon.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: kon_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_kon_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_lin.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51076dbd56131d82389d83bf2b12a224ef6c5443
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_lin.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_lin_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_lua.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_lua.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95973f7d5ce8309ee595ce4e751d564851796923
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_lua.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lua_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_lua_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_lug.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a570b58488496b4a73ca0fe46a2210c95b470bb2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_lug.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_lug_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_luo.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..76d799856b98c4f78804815fd5bd86dd415a100d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_luo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_luo_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_mos.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aeb058ac584c862898e7acf7441aa76b8c123709
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_mos.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: mos_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_mos_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_nso.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f76e016a6bcdda14daf24179da982f696732a199
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_nso.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_nso_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_nus.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_nus.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..255c1861589e185c2bcdb3f1d9f679ed26837be0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_nus.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nus_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_nus_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_nya.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bc7a48abf7f86cdea8f07d54a3a166ffe9550f06
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_nya.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_nya_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_plt.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..880c3d03ef5be9db726024243158f283c2013861
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_plt.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_plt_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_por.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16a258365d56b100c44fd269da27413fd3bffa83
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_por.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_por_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_run.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a97737edf4b9ab31a53749d109359c8acb3d3f4f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_run.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: run_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_run_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_sag.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_sag.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c6897795ec414f37037eb2b79e6ffb6e3124ed7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_sag.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sag_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_sag_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_sna.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da13a6ecf2b11650068be21b5b28ff470aba9002
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_sna.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_sna_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_som.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b6c35f3cb4a25a3ddecda9d4b1dc8528fce64d1f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_som.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_som_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_sot.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1385e058deaf19b0cdf272a768f260997d7cae92
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_sot.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_sot_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_ssw.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d678c12422ff05f87849f934daf402454fa3415e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_ssw.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_ssw_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_swa.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7492cfa329f48c959f6255ffbb879d952fcbe200
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_swa.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_swa_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_taq.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_taq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..783be833f8c77c16a82948f4055162941723849f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_taq.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: taq_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_taq_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_tir.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..931ede568a3faa337637be06bccfd9ca136d8bc2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tir.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_tir_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_tso.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc4c0f1a3278259574fc84fd09be60422174b871
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tso.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_tso_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_tum.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tum.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c099dc6fd73bdad8c8e9e4d306cee8d9dec243fa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tum.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tum_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_tum_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_twi.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00501281a217c4d68834e8a1cd6dec9463e87268
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_twi.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: twi_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_twi_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_tzm.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tzm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3915fa18f2c7963e0b1a0f4f10ca1da87f765141
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_tzm.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_tzm_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_umb.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_umb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a7f1cc79736bfba50fb8ab03c8749a77523feec4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_umb.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: umb_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_umb_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_wol.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fc2440248af154acd2aecdfc6d341230d4bfa67a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_wol.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_wol_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_xho.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e075b84c190d949ad8e177b06445e0136f0445d0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_xho.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_xho_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_yor.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41ef062098ebc4b54b7aec5da59851d490924e6c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_yor.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_yor_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/sib_zul.yaml b/lm_eval/tasks/afrobench/sib/prompt_4/sib_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7fc2f85efc9f9799446133ac107b6b0d66cfb38b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/sib_zul.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: "Label the following text as science/technology, travel, politics, sports,\
+  \ health, entertainment, or geography. Provide only the category as your response.\
+  \ \n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_zul_prompt_4
diff --git a/lm_eval/tasks/afrobench/sib/prompt_4/utils.py b/lm_eval/tasks/afrobench/sib/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_4/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib b/lm_eval/tasks/afrobench/sib/prompt_5/sib
new file mode 100644
index 0000000000000000000000000000000000000000..812df7f614a9c8146b6da3137f4c2e97049b07f2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib
@@ -0,0 +1,43 @@
+tag:
+    - sib_tasks
+    - sib_prompt_5
+    - afrobench_TC_tasks
+dataset_path: Davlan/sib200
+dataset_name: null
+output_type: multiple_choice
+validation_split: validation
+test_split: test
+fewshot_split: validation
+doc_to_target: category
+doc_to_choice:
+    - "science/technology"
+    - "travel"
+    - "politics"
+    - "sports"
+    - "health"
+    - "entertainment"
+    - "geography"
+should_decontaminate: true
+doc_to_decontamination_query: text
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_aeb.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_aeb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3c88c0a28bdd40981ba847762e3cc08b36e66690
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_aeb.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: aeb_Arab
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tunisian Arabic text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_aeb_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_afr.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_afr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d585478be65a678629dd3718c9e44f25a42b5e6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_afr.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: afr_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Afrikaans text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_afr_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_aka.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_aka.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4644bfa3c9c923d544f86f13b273c5f754b236f7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_aka.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: aka_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Akan text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_aka_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_amh.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_amh.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2b5e6f9223f1b20d1d02b5f27635a0684388744
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_amh.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: amh_Ethi
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Amharic text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_amh_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_ary.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ary.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..348c849d219b06501167c51182458f1946f51439
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ary.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ary_Arab
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Moroccan Arabic text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_ary_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_arz.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_arz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..100570428142a0f35ec728558251e96ec484ccb5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_arz.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: arz_Arab
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Egyptian Arabic text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_arz_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_bam.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_bam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bdc655003fc959e8219ae681b64b82dd137853d1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_bam.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: bam_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Bambara text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_bam_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_bem.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_bem.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d42ea873b83e69d3d3d621a9ba1fafd7a88a4ab3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_bem.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: bem_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Bemba text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_bem_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_cjk.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_cjk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9623b8c52bb39ab7dfa0f743d5482169a462b7ab
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_cjk.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: cjk_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Chokwe text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_cjk_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_dik.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_dik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83e76e963fe2962b472b8312d35a79c4e14d2b55
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_dik.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: dik_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Southwestern Dinka text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_dik_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_dyu.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_dyu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ab215e89f959e6edf4bd07d1729f0424e85e0a8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_dyu.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: dyu_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Dyula text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_dyu_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_eng.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_eng.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a17a006d21d32ce9901010cbdcd94aad3af933f4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_eng.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: eng_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ English text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_eng_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_ewe.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ewe.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..195876998160addea6184dbc4d3375192068aec5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ewe.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ewe_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Ewe text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_ewe_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_fon.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_fon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..61980b5110a424ed7391b29dbedf7f7828563f03
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_fon.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: fon_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Fon text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_fon_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_fra.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_fra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..29573054bcfa08ddf225f925cb6131b6d4909163
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_fra.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: fra_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ French text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_fra_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_fuv.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_fuv.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3b48f9f4e09e7042dfdec6ffa224300ee824b580
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_fuv.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: fuv_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Nigerian Fulfulde text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_fuv_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_gaz.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_gaz.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..37e2a4f97793217ecbd9f8c551d585901366cd34
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_gaz.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: gaz_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ West Central Oromo text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_gaz_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_hau.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_hau.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24ce0970e9923725e2255abe09dc6e1629c9d23f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_hau.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: hau_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Hausa text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_hau_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_ibo.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ibo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a39ee75cb90a18ddb12bbedc54fd82a4c4c45ded
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ibo.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ibo_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Igbo text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_ibo_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kab.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kab.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d29da033388668302aca70c3511d52377e8797d9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kab.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kab_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kabyle text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kab_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kam.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kam.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1e55d1218586efb9f8d0b9bad3e9c2c76727a74b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kam.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kam_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kamba text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kam_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kbp.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kbp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..210baea8685a19c4e6bee6bcd141f2f0cb2a101a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kbp.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kbp_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kabiye text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kbp_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kea.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kea.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34a6813c8eff67f57adff7f43c982006b431ccf6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kea.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kea_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kabuverdianu text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kea_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kik.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kik.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..55fdcb00e3f003422315e8de2ef64c8aa9e0abbc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kik.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kik_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kikuyu text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kik_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kin.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6567d52bf1f47beaba19141ab2f95b8168298290
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kin.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kin_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kinyarwanda text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kin_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kmb.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kmb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4ae05cd06aef47942e20bbf72092cd23a5b4fb2f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kmb.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kmb_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kimbundu text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kmb_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_knc.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_knc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9870bd64740561b60f69af908c01ab221585d3fc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_knc.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: knc_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Central Kanuri text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_knc_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_kon.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kon.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..afcab8b8dd34cfa63b1653223c05170789dddc10
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_kon.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: kon_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Kikongo text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_kon_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_lin.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_lin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c1611fd77106ae1eeef87ff5fcce60221ef8039
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_lin.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: lin_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Lingala text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_lin_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_lua.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_lua.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b3b2b9edcb68cf91b5c63273e07bedc936a7ffe1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_lua.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: lua_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Luba-Kasai text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_lua_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_lug.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_lug.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8ca880ace46c5220acdcf2fb5d47bf37e2791aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_lug.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: lug_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Luganda text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_lug_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_luo.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_luo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b942d69d0cf5a367213aca5cc437b26015c83ae3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_luo.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: luo_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Luo text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_luo_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_mos.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_mos.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..daccd62e9345db0c0625c7a26df993fe4f528411
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_mos.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: mos_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Mossi text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_mos_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_nso.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09936e3c333c1cb2c285fc056d6da25246bcfefe
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_nso.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: nso_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Northern Sotho text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_nso_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_nus.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_nus.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5f8e101910ba130d16708ce63388b4808286236
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_nus.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: nus_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Nuer text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_nus_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_nya.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_nya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65737777ba6914aa1a735a260ee7ce7e3bfa9754
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_nya.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: nya_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Nyanga text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_nya_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_plt.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_plt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..24f6ea33e114f92a89b2b08581c3e2d93985362f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_plt.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: plt_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Plateau Malagasy text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_plt_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_por.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_por.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d98ee118637f21a2ff1ffa30cd84099327965cbc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_por.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: por_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Portuguese text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_por_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_run.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_run.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..01820da52cbd3b30dfe1c65a052b47ce1af0c7c0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_run.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: run_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Rundi text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_run_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_sag.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_sag.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fdabdcb63ca35cc8fd419558911099c7d8f14877
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_sag.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: sag_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Sango text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_sag_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_sna.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_sna.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d66f53a7736d27346e47675426abfd4b63b6388
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_sna.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: sna_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Shona text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_sna_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_som.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_som.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0c34f97d20b7c7e4b7be5b7225bf6a91baec3e0
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_som.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: som_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Somali text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_som_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_sot.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_sot.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81ab5c3f7e66f457d59edef55eb79c693c18913d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_sot.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: sot_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Southern Sotho text. For each input, classify the topic as science/technology,\
+  \ travel, politics, sports, health, entertainment, or geography. Use the following\
+  \ guidelines: \n\n science/technology: The text discusses scientific discoveries,\
+  \ technological advancements, or related topics. \ntravel: The text describes travel\
+  \ experiences, destinations, or related topics. \npolitics: The text covers political\
+  \ events, policies, or related topics. \nsports: The text talks about sports events,\
+  \ athletes, or related topics. \nhealth: The text addresses health issues, medical\
+  \ advancements, or related topics. \nentertainment: The text pertains to movies,\
+  \ music, celebrities, or related topics. \ngeography: The text involves geographical\
+  \ information, locations, or related topics. \n\nIf the text contains multiple topics,\
+  \ choose the dominant topic. For ambiguous or unclear topics, select the category\
+  \ that best reflects the overall content. Please provide a single classification\
+  \ for each input.\n\ntext: {{text}} \\category: \n\n"
+include: sib
+task: sib_sot_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_ssw.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ssw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f662d2ab44cebe8ec184c7864207b9ceafa95f58
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_ssw.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: ssw_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Swazi text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_ssw_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_swa.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_swa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee47ca51598f6455804e6e6cad3fb1ca1cacc4d6
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_swa.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: swh_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Swahili text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_swa_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_taq.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_taq.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3fa1380df4b219fae78d9af94069a3becc256832
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_taq.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: taq_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tamasheq text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_taq_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_tir.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20ec0638837c561c45b0267d47a2a7481a3e9ec7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tir.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: tir_Ethi
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tigrinya text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_tir_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_tso.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44b3b867a796111bbcfe2d295ff5c04435878208
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tso.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: tso_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tsonga text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_tso_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_tum.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tum.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bb15fb71e821e69e255b36183d2273a75292fd60
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tum.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: tum_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tumbuka text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_tum_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_twi.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_twi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44bca6194bc19417067ce10659958d0c5993ad87
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_twi.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: twi_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Twi text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_twi_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_tzm.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tzm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d1af77d17ccc2e287c4e598b0094383ec5e4b01
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_tzm.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: tzm_Tfng
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Tamazight text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_tzm_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_umb.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_umb.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a756680cfa29d6b4c83363192d734196989b2d45
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_umb.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: umb_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Umbundu text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_umb_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_wol.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_wol.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8062b55d71066a62df167f96c8e6a72b67e51b60
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_wol.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: wol_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Wolof text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_wol_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_xho.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_xho.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22c27b71a7e08c4f9c58039f5f52a10df324a878
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_xho.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: xho_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Xhosa text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_xho_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_yor.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_yor.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..df51978255654580b37eca4e552e4561a6455e69
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_yor.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: yor_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Yoruba text. For each input, classify the topic as science/technology, travel,\
+  \ politics, sports, health, entertainment, or geography. Use the following guidelines:\
+  \ \n\n science/technology: The text discusses scientific discoveries, technological\
+  \ advancements, or related topics. \ntravel: The text describes travel experiences,\
+  \ destinations, or related topics. \npolitics: The text covers political events,\
+  \ policies, or related topics. \nsports: The text talks about sports events, athletes,\
+  \ or related topics. \nhealth: The text addresses health issues, medical advancements,\
+  \ or related topics. \nentertainment: The text pertains to movies, music, celebrities,\
+  \ or related topics. \ngeography: The text involves geographical information, locations,\
+  \ or related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_yor_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/sib_zul.yaml b/lm_eval/tasks/afrobench/sib/prompt_5/sib_zul.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..03fb9af917049b8dac781d9aefac58ebd3fe4dba
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/sib_zul.yaml
@@ -0,0 +1,18 @@
+# Generated by utils.py
+dataset_name: zul_Latn
+doc_to_text: "You are tasked with performing topic classification on the following\
+  \ Zulu text. For each input, classify the topic as science/technology, travel, politics,\
+  \ sports, health, entertainment, or geography. Use the following guidelines: \n\n\
+  \ science/technology: The text discusses scientific discoveries, technological advancements,\
+  \ or related topics. \ntravel: The text describes travel experiences, destinations,\
+  \ or related topics. \npolitics: The text covers political events, policies, or\
+  \ related topics. \nsports: The text talks about sports events, athletes, or related\
+  \ topics. \nhealth: The text addresses health issues, medical advancements, or related\
+  \ topics. \nentertainment: The text pertains to movies, music, celebrities, or related\
+  \ topics. \ngeography: The text involves geographical information, locations, or\
+  \ related topics. \n\nIf the text contains multiple topics, choose the dominant\
+  \ topic. For ambiguous or unclear topics, select the category that best reflects\
+  \ the overall content. Please provide a single classification for each input.\n\n\
+  text: {{text}} \\category: \n\n"
+include: sib
+task: sib_zul_prompt_5
diff --git a/lm_eval/tasks/afrobench/sib/prompt_5/utils.py b/lm_eval/tasks/afrobench/sib/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/prompt_5/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/sib/sib.yaml b/lm_eval/tasks/afrobench/sib/sib.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6935fee28978fd5f7efb02afd1a54dac363d111
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/sib.yaml
@@ -0,0 +1,13 @@
+group: sib
+task:
+  - sib_prompt_1
+  - sib_prompt_2
+  - sib_prompt_3
+  - sib_prompt_4
+  - sib_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/sib/utils.py b/lm_eval/tasks/afrobench/sib/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d99649e343fa4c491c77cb3167c89cc09907f579
--- /dev/null
+++ b/lm_eval/tasks/afrobench/sib/utils.py
@@ -0,0 +1,227 @@
+import argparse
+import os
+
+import yaml
+
+
+class FunctionTag:
+    def __init__(self, value):
+        self.value = value
+
+
+def prompt_func(mode, lang):
+    prompt_map = {
+        "prompt_1": "Given the categories science/technology, travel, politics, sports, health, entertainment, or geography; what category does the text: '{{text}}' belong to: \n\n",
+        "prompt_2": f"Does this {lang} topic; "
+        "'{{text}}' belong to one of the following categories: science/technology, travel, politics, sports, health, entertainment, or geography? category only\n\n",
+        "prompt_3": f"You are an assistant able to classify topics in texts. \n\n"
+        f"Given the categories science/technology, travel, politics, sports, health, entertainment, or geography; what is "
+        f"the topic of the {lang} statement below? Return only the category. "
+        "\n\ntext: {{text}} \category:\n\n",
+        "prompt_4": "Label the following text as science/technology, travel, politics, sports, health, entertainment, or geography. Provide only the category as your "
+        "response. \n\ntext: {{text}} \category: \n\n",
+        "prompt_5": f"You are tasked with performing topic classification on the following {lang} text. "
+        f"For each input, classify the topic as science/technology, travel, politics, sports, health, entertainment, or geography. "
+        f"Use the following guidelines: \n\n "
+        f"science/technology: The text discusses scientific discoveries, technological advancements, or related topics. \n"
+        f"travel: The text describes travel experiences, destinations, or related topics. \n"
+        f"politics: The text covers political events, policies, or related topics. \n"
+        f"sports: The text talks about sports events, athletes, or related topics. \n"
+        f"health: The text addresses health issues, medical advancements, or related topics. \n"
+        f"entertainment: The text pertains to movies, music, celebrities, or related topics. \n"
+        f"geography: The text involves geographical information, locations, or related topics. \n\n"
+        f"If the text contains multiple topics, choose the dominant topic. "
+        f"For ambiguous or unclear topics, select the category that best reflects the overall content. "
+        "Please provide a single classification for each input.\n\ntext: {{text}} \category: \n\n",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {
+        "aeb": "Tunisian Arabic",
+        "afr": "Afrikaans",
+        "aka": "Akan",
+        "amh": "Amharic",
+        "ary": "Moroccan Arabic",
+        "arz": "Egyptian Arabic",
+        "bam": "Bambara",
+        "bem": "Bemba",
+        "cjk": "Chokwe",
+        "dik": "Southwestern Dinka",
+        "dyu": "Dyula",
+        "eng": "English",
+        "ewe": "Ewe",
+        "fon": "Fon",
+        "fra": "French",
+        "fuv": "Nigerian Fulfulde",
+        "gaz": "West Central Oromo",
+        "hau": "Hausa",
+        "ibo": "Igbo",
+        "kab": "Kabyle",
+        "kam": "Kamba",
+        "kmb": "Kimbundu",
+        "kbp": "Kabiye",
+        "kea": "Kabuverdianu",
+        "kik": "Kikuyu",
+        "kin": "Kinyarwanda",
+        "kon": "Kikongo",
+        "knc": "Central Kanuri",
+        "lua": "Luba-Kasai",
+        "lug": "Luganda",
+        "luo": "Luo",
+        "lin": "Lingala",
+        "mos": "Mossi",
+        "nus": "Nuer",
+        "nso": "Northern Sotho",
+        "nya": "Nyanga",
+        "plt": "Plateau Malagasy",
+        "por": "Portuguese",
+        "run": "Rundi",
+        "sag": "Sango",
+        "sna": "Shona",
+        "som": "Somali",
+        "sot": "Southern Sotho",
+        "ssw": "Swazi",
+        "swa": "Swahili",
+        "taq": "Tamasheq",
+        "tir": "Tigrinya",
+        "tum": "Tumbuka",
+        "tso": "Tsonga",
+        "twi": "Twi",
+        "tzm": "Tamazight",
+        "umb": "Umbundu",
+        "wol": "Wolof",
+        "xho": "Xhosa",
+        "yor": "Yoruba",
+        "zul": "Zulu",
+    }
+
+    lang_2_dataset_lang_code = {
+        "aeb": "aeb_Arab",
+        "afr": "afr_Latn",
+        "aka": "aka_Latn",
+        "amh": "amh_Ethi",
+        "ary": "ary_Arab",
+        "arz": "arz_Arab",
+        "bam": "bam_Latn",
+        "bem": "bem_Latn",
+        "cjk": "cjk_Latn",
+        "dik": "dik_Latn",
+        "dyu": "dyu_Latn",
+        "eng": "eng_Latn",
+        "ewe": "ewe_Latn",
+        "fon": "fon_Latn",
+        "fra": "fra_Latn",
+        "fuv": "fuv_Latn",
+        "gaz": "gaz_Latn",
+        "hau": "hau_Latn",
+        "ibo": "ibo_Latn",
+        "kab": "kab_Latn",
+        "kam": "kam_Latn",
+        "kmb": "kmb_Latn",
+        "kbp": "kbp_Latn",
+        "kea": "kea_Latn",
+        "kik": "kik_Latn",
+        "kin": "kin_Latn",
+        "kon": "kon_Latn",
+        "knc": "knc_Latn",
+        "lua": "lua_Latn",
+        "lug": "lug_Latn",
+        "luo": "luo_Latn",
+        "lin": "lin_Latn",
+        "mos": "mos_Latn",
+        "nus": "nus_Latn",
+        "nso": "nso_Latn",
+        "nya": "nya_Latn",
+        "plt": "plt_Latn",
+        "por": "por_Latn",
+        "run": "run_Latn",
+        "sag": "sag_Latn",
+        "sna": "sna_Latn",
+        "som": "som_Latn",
+        "sot": "sot_Latn",
+        "ssw": "ssw_Latn",
+        "swa": "swh_Latn",
+        "taq": "taq_Latn",
+        "tir": "tir_Ethi",
+        "tum": "tum_Latn",
+        "tso": "tso_Latn",
+        "twi": "twi_Latn",
+        "tzm": "tzm_Tfng",
+        "umb": "umb_Latn",
+        "wol": "wol_Latn",
+        "xho": "xho_Latn",
+        "yor": "yor_Latn",
+        "zul": "zul_Latn",
+    }
+
+    for lang in languages.keys():
+        try:
+            file_name = f"sib_{lang}.yaml"
+            task_name = f"sib_{lang}_{mode}"
+            yaml_template = "sib"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang_2_dataset_lang_code[lang],
+                "doc_to_text": prompt_func(mode, languages[lang]),
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+    parser.add_argument(
+        "--mode",
+        default="prompt_3",
+        choices=["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"],
+        help="Prompt number",
+    )
+    args = parser.parse_args()
+
+    gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/README.md b/lm_eval/tasks/afrobench/uhura-arc-easy/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a0253f987e3723c309bcb5ce4c9a9ad2b3a166ec
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/README.md
@@ -0,0 +1,25 @@
+#
+
+## Paper
+Title: `Uhura: A Benchmark for Evaluating Scientific Question Answering and Truthfulness in Low-Resource African Languages`
+
+Paper Link: https://arxiv.org/abs/2412.00948
+
+## Abstract
+>Evaluations of Large Language Models (LLMs) on knowledge-intensive tasks and factual accuracy often focus on high-resource languages primarily because datasets for low-resource languages (LRLs) are scarce. In this paper, we present Uhura -- a new benchmark that focuses on two tasks in six typologically-diverse African languages, created via human translation of existing English benchmarks. The first dataset, Uhura-ARC-Easy, is composed of multiple-choice science questions. The second, Uhura-TruthfulQA, is a safety benchmark testing the truthfulness of models on topics including health, law, finance, and politics. We highlight the challenges creating benchmarks with highly technical content for LRLs and outline mitigation strategies. Our evaluation reveals a significant performance gap between proprietary models such as GPT-4o and o1-preview, and Claude models, and open-source models like Meta's LLaMA and Google's Gemma. Additionally, all models perform better in English than in African languages. These results indicate that LMs struggle with answering scientific questions and are more prone to generating false claims in low-resource African languages. Our findings underscore the necessity for continuous improvement of multilingual LM capabilities in LRL settings to ensure safe and reliable use in real-world contexts. We open-source the Uhura Benchmark and Uhura Platform to foster further research and development in NLP for LRLs.
+
+HomePage: https://huggingface.co/datasets/masakhane/uhura-arc-easy
+
+### Citation
+
+```
+@misc{bayes2024uhurabenchmarkevaluatingscientific,
+      title={Uhura: A Benchmark for Evaluating Scientific Question Answering and Truthfulness in Low-Resource African Languages},
+      author={Edward Bayes and Israel Abebe Azime and Jesujoba O. Alabi and Jonas Kgomo and Tyna Eloundou and Elizabeth Proehl and Kai Chen and Imaan Khadir and Naome A. Etori and Shamsuddeen Hassan Muhammad and Choice Mpanza and Igneciah Pocia Thete and Dietrich Klakow and David Ifeoluwa Adelani},
+      year={2024},
+      eprint={2412.00948},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2412.00948},
+}
+```
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy
new file mode 100644
index 0000000000000000000000000000000000000000..a7e37181359d9021a3dbb669c42a0e80e0b36c8f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy
@@ -0,0 +1,39 @@
+tag:
+    - uhura_arc_easy_tasks
+    - uhura_arc_easy_prompt_1
+task: null
+dataset_path: masakhane/uhura-arc-easy
+dataset_name: null
+output_type: multiple_choice
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answerKey)}}"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+test_split: test
+fewshot_split: validation
+should_decontaminate: false
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_am.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f61efe4ea501ae6ee9c7553a05bdb7d8540c08f7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_am.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: am_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_am_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_en.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1b879e0221b72d2da47f4fe033f83f20526fef2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_en.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: en_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_en_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_ha.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..986ac5074660ef3c9756e1112e8aa5f34eafefe2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_ha.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: ha_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_ha_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_nso.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ead6d97d67ec6f76e5378899e979413b6f6bb41b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_nso.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: nso_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_nso_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_sw.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e07bb234736d67e91cbcf798d6992fda2f438ec
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_sw.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: sw_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_sw_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_yo.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f96113e4a5d3712fec89aab27fb309c0b85551b3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_yo.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: yo_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_yo_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_zu.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..41c965a071018685e0539ae8ee0f18389d4a0d01
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/uhura-arc-easy_zu.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: zu_multiple_choice
+doc_to_text: "You are a virtual assistant that answers multiple-choice questions with\
+  \ the correct option only.\n\nQuestion: {{question}}\n\nChoices:\n\n{% for i in\
+  \ range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n\
+  {% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_zu_prompt_1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/utils.py b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_1/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy
new file mode 100644
index 0000000000000000000000000000000000000000..295d9c8e907f841f74f8e9b7253d52c6eee2b224
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy
@@ -0,0 +1,38 @@
+tag:
+    - uhura_arc_easy_tasks
+    - uhura_arc_easy_prompt_2
+dataset_path: masakhane/uhura-arc-easy
+dataset_name: null
+output_type: multiple_choice
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answerKey)}}"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+test_split: test
+fewshot_split: validation
+should_decontaminate: false
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_am.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2596bd487078859572e018d19f6f39c5e32f3dc5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_am.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: am_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_am_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_en.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3edfc10ea1ada6252d938e57aed3a1f03ade802
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_en.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: en_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_en_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_ha.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d857b2e44c5fb9a7da2ac1e8dfbb426978799073
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_ha.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ha_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_ha_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_nso.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..93fbfe587dc9dfb45c08aaaeb3c6c3528d766110
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_nso.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: nso_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_nso_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_sw.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5fc929f54de32f0010844d2ed816cd3b634184f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_sw.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sw_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_sw_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_yo.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67b09752a4491d345d61bbbba5219cc1f5001544
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_yo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yo_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_yo_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_zu.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b261b51fcc11770a22eaf0bd8285b2028d416b9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/uhura-arc-easy_zu.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zu_multiple_choice
+doc_to_text: "Choose the correct option that answers the question below:\n\nQuestion:\
+  \ {{question}}\n\nChoices:\n\n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i]\
+  \ }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_zu_prompt_2
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/utils.py b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_2/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy
new file mode 100644
index 0000000000000000000000000000000000000000..23e2c37396c75ce09fcd608e69d5ce42173df1ca
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy
@@ -0,0 +1,38 @@
+tag:
+    - uhura_arc_easy_tasks
+    - uhura_arc_easy_prompt_3
+dataset_path: masakhane/uhura-arc-easy
+dataset_name: null
+output_type: multiple_choice
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answerKey)}}"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+test_split: test
+fewshot_split: validation
+should_decontaminate: false
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_am.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..42716a7cdc9b1ac266efbec32ebe4bebe6bf578e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_am.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: am_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_am_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_en.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a89312e09a10b9db5a2a9a2a0914980c9ef686a5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_en.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: en_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_en_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_ha.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de511a8af7dc52180b56d52a1c7d57955cbd6eb4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_ha.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ha_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_ha_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_nso.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..358d084cea2131f7e94f82ec733234371f2b8446
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_nso.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: nso_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_nso_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_sw.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4a8785d622caed597dbb0eefbd4290f2636f866
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_sw.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sw_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_sw_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_yo.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9416362827513ac4b95cf843a83ec0d0efbc45e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_yo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yo_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_yo_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_zu.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a44b8c0e6ebae36db00a6847aacb3263c84fb7b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/uhura-arc-easy_zu.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zu_multiple_choice
+doc_to_text: "Answer the following multiple-choice question by picking 'A', 'B', 'C',\
+  \ or 'D'.\n\nQuestion: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_zu_prompt_3
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/utils.py b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_3/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy
new file mode 100644
index 0000000000000000000000000000000000000000..e697f4c7363aee6c39b0d927ba9d1b575f4063d5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy
@@ -0,0 +1,38 @@
+tag:
+    - uhura_arc_easy_tasks
+    - uhura_arc_easy_prompt_4
+dataset_path: masakhane/uhura-arc-easy
+dataset_name: null
+output_type: multiple_choice
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answerKey)}}"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+test_split: test
+fewshot_split: validation
+should_decontaminate: false
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_am.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4eaa02f59b217a8ce13f41fca67f9491aab917aa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_am.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: am_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_am_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_en.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..461e6f9e7516e2382380e391f3d2d713bf494ef9
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_en.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: en_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_en_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_ha.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..435ea73bc7639669ac53445f0ea9adf72edbd347
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_ha.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: ha_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_ha_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_nso.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..09112d5af3adaf7251a39032aabf60af73779088
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_nso.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: nso_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_nso_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_sw.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..264770eeda75acdf9088ac24e24645a9e5638c25
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_sw.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: sw_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_sw_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_yo.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..10af53de81d71c82f02f1da80bdc9b5dc114bfed
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_yo.yaml
@@ -0,0 +1,6 @@
+# Generated by utils.py
+dataset_name: yo_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_yo_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_zu.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..239b1648a6e2c5d431a83ef0a94c16c6db90cc1b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/uhura-arc-easy_zu.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: zu_multiple_choice
+doc_to_text: "Question: {{question}}\n\nOptions:\n\n{% for i in range(choices['text']|length)\
+  \ %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_zu_prompt_4
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/utils.py b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_4/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy
new file mode 100644
index 0000000000000000000000000000000000000000..3f5ac554027a87a6fe5eeda14887a46f5af5ef2f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy
@@ -0,0 +1,38 @@
+tag:
+    - uhura_arc_easy_tasks
+    - uhura_arc_easy_prompt_5
+dataset_path: masakhane/uhura-arc-easy
+dataset_name: null
+output_type: multiple_choice
+doc_to_target: "{{['A', 'B', 'C', 'D'].index(answerKey)}}"
+doc_to_choice:
+  - A
+  - B
+  - C
+  - D
+test_split: test
+fewshot_split: validation
+should_decontaminate: false
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
+    # aggregation: mean
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    ignore_punctuation: true
+    regexes_to_ignore:
+      - ","
+      - "\\$"
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_am.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_am.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f7f0231017eb5553893708f939b5fb23d0f60e1e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_am.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: am_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_am_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_en.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_en.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5aea6abac239580735b805c2582be3976a9986d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_en.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: en_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_en_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_ha.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_ha.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6293bda284e9ca224c3be7a65e1686f3c97210d8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_ha.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: ha_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_ha_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_nso.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_nso.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..80aff7064e48444477970baf5ccedf930a560a34
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_nso.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: nso_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_nso_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_sw.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_sw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5bc7d2e5600b46a8660c83e76c69b9a24d0f398
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_sw.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: sw_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_sw_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_yo.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_yo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a267e987218945fc09217266defb4b0775fd777f
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_yo.yaml
@@ -0,0 +1,7 @@
+# Generated by utils.py
+dataset_name: yo_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+include: uhura-arc-easy
+task: uhura-arc-easy_yo_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_zu.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_zu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..69ce4a396af9f0bd6c96071319ef51ac3c1a81cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/uhura-arc-easy_zu.yaml
@@ -0,0 +1,8 @@
+# Generated by utils.py
+dataset_name: zu_multiple_choice
+doc_to_text: "Which of the following options answers this question: {{question}}\n\
+  \n{% for i in range(choices['text']|length) %}\t{{ 'ABCD'[i] }}: {{ choices['text'][i]\
+  \ }}\n{% endfor %}\nAnswer: "
+fewshot_split: train
+include: uhura-arc-easy
+task: uhura-arc-easy_zu_prompt_5
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/utils.py b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/prompt_5/utils.py
@@ -0,0 +1 @@
+from lm_eval.utils import weighted_f1_score
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/uhura.yaml b/lm_eval/tasks/afrobench/uhura-arc-easy/uhura.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e2e2fea5fb49838103490f6f16321da45022cc7c
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/uhura.yaml
@@ -0,0 +1,13 @@
+group: uhura_arc_easy
+task:
+  - uhura_arc_easy_prompt_1
+  - uhura_arc_easy_prompt_2
+  - uhura_arc_easy_prompt_3
+  - uhura_arc_easy_prompt_4
+  - uhura_arc_easy_prompt_5
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 1
diff --git a/lm_eval/tasks/afrobench/uhura-arc-easy/utils.py b/lm_eval/tasks/afrobench/uhura-arc-easy/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1216618cbff12e6f4a21ff532d7da16abbef1bde
--- /dev/null
+++ b/lm_eval/tasks/afrobench/uhura-arc-easy/utils.py
@@ -0,0 +1,129 @@
+import argparse
+import os
+
+import pycountry
+import yaml
+
+
+def get_language_from_code(code: str) -> str:
+    language_tuple = pycountry.languages.get(**{f"alpha_{len(code)}": code})
+    return language_tuple.name
+
+
+def prompt_func(mode):
+    prompt_map = {
+        "prompt_1": "You are a virtual assistant that answers multiple-choice questions with the correct option only.\n\n"
+        "Question: {{question}}\n\n"
+        "Choices:\n\n"
+        "{% for i in range(choices['text']|length) %}"
+        "\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n"
+        "{% endfor %}\n"
+        "Answer: ",
+        "prompt_2": "Choose the correct option that answers the question below:\n\n"
+        "Question: {{question}}\n\n"
+        "Choices:\n\n"
+        "{% for i in range(choices['text']|length) %}"
+        "\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n"
+        "{% endfor %}\n"
+        "Answer: ",
+        "prompt_3": "Answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.\n\n"
+        "Question: {{question}}\n\n"
+        "Options:\n\n"
+        "{% for i in range(choices['text']|length) %}"
+        "\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n"
+        "{% endfor %}\n"
+        "Answer: ",
+        "prompt_4": "Question: {{question}}\n\n"
+        "Options:\n\n"
+        "{% for i in range(choices['text']|length) %}"
+        "\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n"
+        "{% endfor %}\n"
+        "Answer: ",
+        "prompt_5": "Which of the following options answers this question: {{question}}\n\n"
+        "{% for i in range(choices['text']|length) %}"
+        "\t{{ 'ABCD'[i] }}: {{ choices['text'][i] }}\n"
+        "{% endfor %}\n"
+        "Answer: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    languages = {"am", "en", "ha", "nso", "sw", "yo", "zu"}
+
+    for lang in languages:
+        try:
+            file_name = f"uhura-arc-easy_{lang}.yaml"
+            task_name = f"uhura-arc-easy_{lang}_{mode}"
+            yaml_template = "uhura-arc-easy"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": f"{lang}_multiple_choice{'_unmatched' if lang == 'nso' else ''}",
+                "doc_to_text": prompt_func(mode),
+            }
+            if lang in ("nso", "zu"):
+                yaml_details["fewshot_split"] = "train"
+
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+
+    PROMPT_CHOICES = ["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"]
+    parser.add_argument(
+        "--mode",
+        nargs="*",
+        default=PROMPT_CHOICES,
+        choices=PROMPT_CHOICES,
+        help="Prompt number(s)",
+    )
+    args = parser.parse_args()
+
+    for mode in args.mode:
+        gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/xlsum/README.md b/lm_eval/tasks/afrobench/xlsum/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d9a47076e564de38fb4a7eb2cbd1df8a3b0290d2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/README.md
@@ -0,0 +1,34 @@
+#
+
+## Paper
+Title: `XL-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages`
+
+Paper Link: https://aclanthology.org/2021.findings-acl.413/
+
+## Abstract
+>Contemporary works on abstractive text summarization have focused primarily on high-resource languages like English, mostly due to the limited availability of datasets for low/mid-resource ones. In this work, we present XL-Sum, a comprehensive and diverse dataset comprising 1 million professionally annotated article-summary pairs from BBC, extracted using a set of carefully designed heuristics. The dataset covers 44 languages ranging from low to high-resource, for many of which no public dataset is currently available. XL-Sum is highly abstractive, concise, and of high quality, as indicated by human and intrinsic evaluation. We fine-tune mT5, a state-of-the-art pretrained multilingual model, with XL-Sum and experiment on multilingual and low-resource summarization tasks. XL-Sum induces competitive results compared to the ones obtained using similar monolingual datasets: we show higher than 11 ROUGE-2 scores on 10 languages we benchmark on, with some of them exceeding 15, as obtained by multilingual training. Additionally, training on low-resource languages individually also provides competitive performance. To the best of our knowledge, XL-Sum is the largest abstractive summarization dataset in terms of the number of samples collected from a single source and the number of languages covered. We are releasing our dataset and models to encourage future research on multilingual abstractive summarization.
+
+HomePage: https://github.com/csebuetnlp/xl-sum
+
+### Citation
+
+```
+@inproceedings{hasan-etal-2021-xl,
+    title = "{XL}-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages",
+    author = "Hasan, Tahmid  and
+      Bhattacharjee, Abhik  and
+      Islam, Md. Saiful  and
+      Mubasshir, Kazi  and
+      Li, Yuan-Fang  and
+      Kang, Yong-Bin  and
+      Rahman, M. Sohel  and
+      Shahriyar, Rifat",
+    booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
+    month = aug,
+    year = "2021",
+    address = "Online",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2021.findings-acl.413",
+    pages = "4693--4703",
+}
+```
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/utils.py b/lm_eval/tasks/afrobench/xlsum/prompt_1/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..85db4d4f4cef061e526c970ece194317e576de06
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/utils.py
@@ -0,0 +1,18 @@
+import evaluate
+
+
+def rougeL(items):
+    """
+    # passthrough for efficiency
+    """
+    return items
+
+
+def rougeL_agg(items):
+    """
+    Higher is better
+    """
+    refs = list(zip(*items))[0]
+    preds = list(zip(*items))[1]
+    rouge_scorer = evaluate.load("rouge")
+    return rouge_scorer.compute(predictions=preds, references=refs)["rougeL"]
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum
new file mode 100644
index 0000000000000000000000000000000000000000..f6b0421edd9ba2b3f1c2eac1dbfaf6f51e5cfba5
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum
@@ -0,0 +1,22 @@
+tag:
+  - xlsum_tasks
+  - xlsum_prompt_1
+task: null
+dataset_path: csebuetnlp/xlsum
+dataset_name: null
+dataset_kwargs:
+  trust_remote_code: true
+output_type: generate_until
+generation_kwargs:
+  until:
+    - "</s>"
+validation_split: validation
+fewshot_split: validation
+test_split: test
+should_decontaminate: false
+metric_list:
+  - metric: !function utils.rougeL
+    higher_is_better: true
+    aggregation:  !function utils.rougeL_agg
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_amharic.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_amharic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ab68805aa658c3c15d8367f48115f40e2581aac
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_amharic.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: amharic
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Amharic. Ensure that you
+  provide the summary in Amharic and nothing else.
+
+  Document in Amharic: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_amharic_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_arabic.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_arabic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..af7df7d90f01b274c1d54076256d7e3a510627b4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_arabic.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: arabic
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Arabic. Ensure that you
+  provide the summary in Arabic and nothing else.
+
+  Document in Arabic: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_arabic_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_hausa.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_hausa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..37f6b3e518835365e7b59fb550c15e286c85f63a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_hausa.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: hausa
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Hausa. Ensure that you
+  provide the summary in Hausa and nothing else.
+
+  Document in Hausa: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_hausa_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_igbo.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_igbo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04644b5d7bdd8595c5beb02240fe521162dcf3fd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_igbo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: igbo
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Igbo. Ensure that you provide
+  the summary in Igbo and nothing else.
+
+  Document in Igbo: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_igbo_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_kirundi.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_kirundi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c434296f5598cb995c40568ab69141f29571d57
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_kirundi.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: kirundi
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Kirundi. Ensure that you
+  provide the summary in Kirundi and nothing else.
+
+  Document in Kirundi: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_kirundi_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_oromo.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_oromo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78fb14eca4344c17ed3300954193764568be40d4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_oromo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: oromo
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Oromo. Ensure that you
+  provide the summary in Oromo and nothing else.
+
+  Document in Oromo: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_oromo_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_pidgin.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_pidgin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..68f2c17f560ee888ea1ee958c9ba2392d6f47dfc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_pidgin.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: pidgin
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Nigerian pidgin. Ensure
+  that you provide the summary in Nigerian pidgin and nothing else.
+
+  Document in Nigerian pidgin: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_pidgin_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_somali.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_somali.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d699dc1905796945e89f3659060202f7314ed776
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_somali.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: somali
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Somali. Ensure that you
+  provide the summary in Somali and nothing else.
+
+  Document in Somali: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_somali_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_swahili.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_swahili.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a951c11b8c7ee59f1dfcd3eb44eaada2bd0652a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_swahili.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: swahili
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Swahili. Ensure that you
+  provide the summary in Swahili and nothing else.
+
+  Document in Swahili: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_swahili_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_telugu.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_telugu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..82a60171a5e2a42e1eb5d43aaf8a034e77b4a798
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_telugu.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: telugu
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Telugu. Ensure that you
+  provide the summary in Telugu and nothing else.
+
+  Document in Telugu: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_telugu_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_tigrinya.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_tigrinya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..31630982a134b934311c00164c42dc9fabf22cc7
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_tigrinya.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: tigrinya
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Tigrinya. Ensure that you
+  provide the summary in Tigrinya and nothing else.
+
+  Document in Tigrinya: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_tigrinya_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_yoruba.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_yoruba.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c14a9113e293c057d028e27cd09ed1f6812c1e8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_1/xlsum_yoruba.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: yoruba
+doc_to_target: '{{summary}}'
+doc_to_text: 'Provide a summary of the document written in Yoruba. Ensure that you
+  provide the summary in Yoruba and nothing else.
+
+  Document in Yoruba: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_yoruba_prompt_1
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/utils.py b/lm_eval/tasks/afrobench/xlsum/prompt_2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..85db4d4f4cef061e526c970ece194317e576de06
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/utils.py
@@ -0,0 +1,18 @@
+import evaluate
+
+
+def rougeL(items):
+    """
+    # passthrough for efficiency
+    """
+    return items
+
+
+def rougeL_agg(items):
+    """
+    Higher is better
+    """
+    refs = list(zip(*items))[0]
+    preds = list(zip(*items))[1]
+    rouge_scorer = evaluate.load("rouge")
+    return rouge_scorer.compute(predictions=preds, references=refs)["rougeL"]
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum
new file mode 100644
index 0000000000000000000000000000000000000000..e572c00c6ae1c0f8f84f1030c5903325ca1f0ae4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum
@@ -0,0 +1,22 @@
+tag:
+  - xlsum_tasks
+  - xlsum_prompt_2
+task: null
+dataset_path: csebuetnlp/xlsum
+dataset_name: null
+dataset_kwargs:
+  trust_remote_code: true
+output_type: generate_until
+generation_kwargs:
+  until:
+    - "</s>"
+validation_split: validation
+fewshot_split: validation
+test_split: test
+should_decontaminate: false
+metric_list:
+  - metric: !function utils.rougeL
+    higher_is_better: true
+    aggregation:  !function utils.rougeL_agg
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_amharic.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_amharic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f2275c657b54df708f62526fdc12b0381f197eb
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_amharic.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: amharic
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_amharic_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_arabic.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_arabic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4f772c31610175417ee97105ae4d99f526f0c41
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_arabic.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: arabic
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_arabic_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_hausa.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_hausa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7485672cb48c86d1df444391064b04754836fa77
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_hausa.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: hausa
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_hausa_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_igbo.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_igbo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2cf7fafe394e049aaa3382068d6da8cac70cf705
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_igbo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: igbo
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_igbo_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_kirundi.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_kirundi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63021d7d4f9ff0d252c82665fd8262bd6bb5c327
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_kirundi.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: kirundi
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_kirundi_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_oromo.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_oromo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b637b10d0428d614fcd4c06bdb1fb2383057ef77
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_oromo.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: oromo
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_oromo_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_pidgin.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_pidgin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c13d93d5c1ff95ac76c1b87f4c301c97a771f52
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_pidgin.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: pidgin
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_pidgin_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_somali.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_somali.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b7245ddc193a133701fd8f71cd6b52cd34899594
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_somali.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: somali
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_somali_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_swahili.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_swahili.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..65f176fba40e37003a9cfd8813957760cdac2aa1
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_swahili.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: swahili
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_swahili_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_telugu.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_telugu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ecbdde5c90806b2684fce1373c3fad94ef5c65e
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_telugu.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: telugu
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_telugu_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_tigrinya.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_tigrinya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d46e2fb573f5cdd3bb9459c7eb5b95150cae5ec8
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_tigrinya.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: tigrinya
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_tigrinya_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_yoruba.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_yoruba.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ea0ef503444a8237ce5fd693f6ebec082a8a6cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_2/xlsum_yoruba.yaml
@@ -0,0 +1,9 @@
+# Generated by utils.py
+dataset_name: yoruba
+doc_to_target: '{{summary}}'
+doc_to_text: 'Summarize the document below in triple backticks and return only the
+  summary and nothing else.
+
+  ```{{''text''}}```\n'
+include: xlsum
+task: xlsum_yoruba_prompt_2
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/utils.py b/lm_eval/tasks/afrobench/xlsum/prompt_3/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..85db4d4f4cef061e526c970ece194317e576de06
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/utils.py
@@ -0,0 +1,18 @@
+import evaluate
+
+
+def rougeL(items):
+    """
+    # passthrough for efficiency
+    """
+    return items
+
+
+def rougeL_agg(items):
+    """
+    Higher is better
+    """
+    refs = list(zip(*items))[0]
+    preds = list(zip(*items))[1]
+    rouge_scorer = evaluate.load("rouge")
+    return rouge_scorer.compute(predictions=preds, references=refs)["rougeL"]
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum
new file mode 100644
index 0000000000000000000000000000000000000000..08842ef8eb627dfb12387ae7ef2e232d2f3c40d3
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum
@@ -0,0 +1,22 @@
+tag:
+  - xlsum_tasks
+  - xlsum_prompt_3
+task: null
+dataset_path: csebuetnlp/xlsum
+dataset_name: null
+dataset_kwargs:
+  trust_remote_code: true
+output_type: generate_until
+generation_kwargs:
+  until:
+    - "</s>"
+validation_split: validation
+fewshot_split: validation
+test_split: test
+should_decontaminate: false
+metric_list:
+  - metric: !function utils.rougeL
+    higher_is_better: true
+    aggregation:  !function utils.rougeL_agg
+metadata:
+  version: 1.0
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_amharic.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_amharic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6fc85e7ceb43f474754081088a77b3979b785334
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_amharic.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: amharic
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Amharic. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_amharic_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_arabic.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_arabic.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4f2b1f5c09bcf2dc76fb4807e09a1cc52b6ce82
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_arabic.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: arabic
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Arabic. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_arabic_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_hausa.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_hausa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1a0603749b196a9cda3f995fb16ea2814513142
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_hausa.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: hausa
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Hausa. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_hausa_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_igbo.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_igbo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b23f8f395679740acd523757311a7803407c3cd
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_igbo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: igbo
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Igbo. Your main goal is to ensure summaries are concise and
+  informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_igbo_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_kirundi.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_kirundi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f40b2a7ff68847a5bae0649451460d22f24ae2a
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_kirundi.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: kirundi
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Kirundi. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_kirundi_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_oromo.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_oromo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bbc912851b05e78829a2835e14e28831469d302d
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_oromo.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: oromo
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Oromo. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_oromo_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_pidgin.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_pidgin.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8149e441e9869a94312fe54001cd93fd6d720eaa
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_pidgin.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: pidgin
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Nigerian pidgin. Your main goal is to ensure summaries are
+  concise and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_pidgin_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_somali.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_somali.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2936da11bdca6fb8fd9ba6f288968ee0c1843a4
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_somali.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: somali
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Somali. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_somali_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_swahili.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_swahili.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f90f4cfaa99d4291d36e9e9aec715e32925cb55
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_swahili.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: swahili
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Swahili. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_swahili_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_telugu.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_telugu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..67d116dce2105d9121c5f13e268333005d9a91dc
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_telugu.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: telugu
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Telugu. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_telugu_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_tigrinya.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_tigrinya.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5b20d6e3bfdb66590f7004ff0f187e2a6537db84
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_tigrinya.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: tigrinya
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Tigrinya. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_tigrinya_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_yoruba.yaml b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_yoruba.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..353be14cda6713964f73436b2085d6ae63fcdc57
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/prompt_3/xlsum_yoruba.yaml
@@ -0,0 +1,10 @@
+# Generated by utils.py
+dataset_name: yoruba
+doc_to_target: '{{summary}}'
+doc_to_text: 'You are an advanced Summarizer, a specialized assistant designed to
+  summarize documents in Yoruba. Your main goal is to ensure summaries are concise
+  and informative. Ensure you return the summary only and nothing else.
+
+  Document: {{''text''}}\nSummary: '
+include: xlsum
+task: xlsum_yoruba_prompt_3
diff --git a/lm_eval/tasks/afrobench/xlsum/utils.py b/lm_eval/tasks/afrobench/xlsum/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8df1e12e8b4aa683bd71c2fb23d90ff7667de5b2
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/utils.py
@@ -0,0 +1,118 @@
+import argparse
+import os
+
+import yaml
+
+
+def prompt_func(mode, lang):
+    if lang == "pidgin":
+        lang = "Nigerian Pidgin"
+
+    prompt_map = {
+        "prompt_1": f"Provide a summary of the document written in {lang.capitalize()}. Ensure that you provide the summary in {lang.capitalize()} and nothing else.\n"
+        f"Document in {lang.capitalize()}: " + r"{{'text'}}\n"
+        "Summary: ",
+        "prompt_2": "Summarize the document below in triple backticks and return only the summary and nothing else.\n"
+        + r"```{{'text'}}```\n",
+        "prompt_3": f"You are an advanced Summarizer, a specialized assistant designed to summarize documents in {lang.capitalize()}. "
+        f"Your main goal is to ensure summaries are concise and informative. Ensure you return the summary only and nothing else.\n"
+        f"Document: " + r"{{'text'}}\n"
+        "Summary: ",
+        "prompt_4": f"Summarize this {lang.capitalize()} document:\n" + r"{{'text'}}\n"
+        "Summary: ",
+        "prompt_5": f"{lang.capitalize()} document: " + r"{{'text'}}\n"
+        "Summary: ",
+    }
+    return prompt_map[mode]
+
+
+def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
+    """
+    Generate a yaml file for each language.
+
+    :param output_dir: The directory to output the files to.
+    :param overwrite: Whether to overwrite files if they already exist.
+    """
+    err = []
+    XLSUM_LANGUAGES = (
+        "amharic",
+        "arabic",
+        "hausa",
+        "igbo",
+        "kirundi",
+        "oromo",
+        "pidgin",
+        "somali",
+        "swahili",
+        "telugu",
+        "tigrinya",
+        "yoruba",
+    )
+
+    for lang in XLSUM_LANGUAGES:
+        try:
+            file_name = f"xlsum_{lang}.yaml"
+            task_name = f"xlsum_{lang}_{mode}"
+            yaml_template = "xlsum"
+            yaml_details = {
+                "include": yaml_template,
+                "task": task_name,
+                "dataset_name": lang,
+                "doc_to_text": prompt_func(mode, lang),
+                "doc_to_target": "{{summary}}",
+            }
+            file_path = os.path.join(output_dir, mode)
+            os.makedirs(file_path, exist_ok=True)
+
+            with open(
+                f"{output_dir}/{mode}/{file_name}",
+                "w" if overwrite else "x",
+                encoding="utf8",
+            ) as f:
+                f.write("# Generated by utils.py\n")
+                yaml.dump(
+                    yaml_details,
+                    f,
+                    allow_unicode=True,
+                )
+        except FileExistsError:
+            err.append(file_name)
+
+    if len(err) > 0:
+        raise FileExistsError(
+            "Files were not created because they already exist (use --overwrite flag):"
+            f" {', '.join(err)}"
+        )
+
+
+def main() -> None:
+    """Parse CLI args and generate language-specific yaml files."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--overwrite",
+        default=True,
+        action="store_true",
+        help="Overwrite files if they already exist",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./",
+        help="Directory to write yaml files to",
+    )
+
+    PROMPT_CHOICES = ["prompt_1", "prompt_2", "prompt_3", "prompt_4", "prompt_5"]
+    parser.add_argument(
+        "--mode",
+        nargs="*",
+        default=PROMPT_CHOICES,
+        choices=PROMPT_CHOICES,
+        help="Prompt number(s)",
+    )
+    args = parser.parse_args()
+
+    for mode in args.mode:
+        gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=mode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lm_eval/tasks/afrobench/xlsum/xlsum.yaml b/lm_eval/tasks/afrobench/xlsum/xlsum.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d87717597c59eb333f712d69eb854e971146915
--- /dev/null
+++ b/lm_eval/tasks/afrobench/xlsum/xlsum.yaml
@@ -0,0 +1,11 @@
+group: xlum
+task:
+  - xlsum_prompt_1
+  - xlsum_prompt_2
+  - xlsum_prompt_3
+aggregate_metric_list:
+  - metric: acc
+    aggregation: mean
+    weight_by_size: true
+metadata:
+  version: 2