merged main

90ad5db7 · lintangsutawika · f692caa9 · b177c82c · f692caa9 · 90ad5db7
Commit 90ad5db7 authored Mar 01, 2024 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_cot_native_zh.yaml
-# Generated by utils.py
-dataset_name: zh
-doc_to_target: '{% if answer is not none %}{{answer[5+1]}}{% else %}{{answer_number|string}}{%
-  endif %}'
-doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{%
-  endif %}'
-include: cot_yaml
-task: mgsm_zh_native_cot
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_bn.yaml
+# Generated by utils.py
+dataset_name: bn
+doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\nধাপে ধাপে উত্তর:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: The answer is (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'প্রশ্ন:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_bn
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_de.yaml
+# Generated by utils.py
+dataset_name: de
+doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\nSchritt-für-Schritt-Antwort:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: Die Antwort lautet (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Frage:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_de
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_en.yaml
+# Generated by utils.py
+dataset_name: en
+doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: The answer is (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_en
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_es.yaml
+# Generated by utils.py
+dataset_name: es
+doc_to_target: '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\nRespuesta paso a paso:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: La respuesta es (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Pregunta:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_es
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_fr.yaml
+# Generated by utils.py
+dataset_name: fr
+doc_to_target: '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\nRéponse étape par étape :"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: La réponse est (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Question :'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_fr
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ja.yaml
+# Generated by utils.py
+dataset_name: ja
+doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\nステップごとの答え:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: 答えは(\-?[0-9\.\,]+)です。
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - '問題:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_ja
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_ru.yaml
+# Generated by utils.py
+dataset_name: ru
+doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\nПошаговоерешение:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: Ответ — (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Задача:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_ru
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_sw.yaml
+# Generated by utils.py
+dataset_name: sw
+doc_to_target: '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\nJibu la Hatua kwa Hatua:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: Jibu ni (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'Swali:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_sw
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_te.yaml
+# Generated by utils.py
+dataset_name: te
+doc_to_target: '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\nదశలవారీగా సమాధానం:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: సమాధానం (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'ప్రశ్న:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_te
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_th.yaml
+# Generated by utils.py
+dataset_name: th
+doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\nคำตอบทีละขั้นตอน:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: คำตอบคือ (\-?[0-9\.\,]+)
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - 'โจทย์:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_th
--- a/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
+++ b/lm_eval/tasks/mgsm/native_cot/mgsm_native_cot_zh.yaml
+# Generated by utils.py
+dataset_name: zh
+doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}'
+doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{% endif %}'
+filter_list:
+- filter:
+  - function: regex
+    regex_pattern: 答案是 (\-?[0-9\.\,]+)。
+  - function: take_first
+  name: strict-match
+- filter:
+  - function: regex
+    group_select: -1
+    regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+  - function: take_first
+  name: flexible-extract
+generation_kwargs:
+  do_sample: false
+  until:
+  - '问题:'
+  - </s>
+  - <|im_end|>
+include: cot_yaml
+task: mgsm_native_cot_zh
--- a/lm_eval/tasks/mgsm/utils.py
+++ b/lm_eval/tasks/mgsm/utils.py
-import yaml
 import argparse

+import yaml
+

 LANGUAGES = {
    "bn": {  # Bengali
@@ -99,11 +100,24 @@ def add_regex_pattern(regex_pattern):
    return {
        "filter_list": [
            {
-                "name": "get-answer",
+                "name": "strict-match",
+                "filter": [
+                    {
+                        "function": "regex",
+                        "regex_pattern": f"""{regex_pattern}""",
+                    },
+                    {
+                        "function": "take_first",
+                    },
+                ],
+            },
+            {
+                "name": "flexible-extract",
                "filter": [
                    {
                        "function": "regex",
-                        "regex_pattern": regex_pattern,
+                        "regex_pattern": """(-?[$0-9.,]{2,})|(-?[0-9]+)""",
+                        "group_select": -1,
                    },
                    {
                        "function": "take_first",
@@ -128,23 +142,25 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:

            yaml_template = "cot_yaml"
            filter_list = {}
+            DELIMITER = None
            if mode == "direct":
                ANSWER = LANGUAGES[lang]["DIRECT"]
                REGEX = None
-                task_name = f"mgsm_{lang}_direct"
+                task_name = f"mgsm_direct_{lang}"
                yaml_template = "direct_yaml"
            elif mode == "native-cot":
                ANSWER = LANGUAGES[lang]["ANSWER"]
                REGEX = LANGUAGES[lang]["REGEX"]
-                task_name = f"mgsm_{lang}_native-cot"
+                task_name = f"mgsm_native_cot_{lang}"
                filter_list = add_regex_pattern(REGEX)
+                DELIMITER = "" if lang in ["zh", "ja"] else None
            elif mode == "en-cot":
                ANSWER = LANGUAGES["en"]["ANSWER"]
                REGEX = LANGUAGES["en"]["REGEX"]
-                task_name = f"mgsm_{lang}_en-cot"
+                task_name = f"mgsm_en_cot_{lang}"

            file_name = f"{task_name}.yaml"
-
+            ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1
            with open(
                f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
            ) as f:
@@ -153,18 +169,23 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                    {
                        "include": yaml_template,
                        "dataset_name": lang,
-                        "task": f"mgsm_{lang}_direct",
+                        "task": f"{task_name}",
                        "doc_to_text": f"""{{% if answer is not none %}}"""
                        f"""{{{{question+"\\n{ANSWER}"}}}}"""
                        f"""{{% else %}}"""
                        f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
                        f"""{{% endif %}}""",
                        "doc_to_target": f"""{{% if answer is not none %}}"""
-                        f"""{{{{answer[{len(ANSWER)}+1]}}}}"""
+                        f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
                        f"""{{% else %}}"""
                        f"""{{{{answer_number|string}}}}"""
                        f"""{{% endif %}}""",
                        **filter_list,
+                        "generation_kwargs": {
+                            "until": [QUESTION, "</s>", "<|im_end|>"],
+                            "do_sample": False,
+                        },
+                        **({"target_delimiter": DELIMITER} if DELIMITER else {}),
                    },
                    f,
                    allow_unicode=True,

--- a/lm_eval/tasks/minerva_math/utils.py
+++ b/lm_eval/tasks/minerva_math/utils.py
-import datasets
 import re
 import signal
+from typing import Dict, List, Optional
+
+import datasets
+
 from lm_eval.utils import eval_logger
-from typing import Optional, List, Dict
+

 try:
    import sympy
    from sympy.parsing.latex import parse_latex
 except ModuleNotFoundError:
-    raise Exception(
+    raise ModuleNotFoundError(
        "`sympy` is required for generating translation task prompt templates. \
 please install sympy via pip install lm-eval[math] or pip install -e .[math]",
    )

--- a/lm_eval/tasks/mmlu/_generate_configs.py
+++ b/lm_eval/tasks/mmlu/_generate_configs.py
 """
 Take in a YAML, and output all "other" splits with this YAML
 """
-import os
-import yaml
 import argparse
+import os

+import yaml
 from tqdm import tqdm

 from lm_eval.logger import eval_logger

+
 SUBJECTS = {
    "abstract_algebra": "stem",
    "anatomy": "stem",
@@ -124,7 +125,6 @@ if __name__ == "__main__":
            yaml.dump(
                yaml_dict,
                yaml_file,
-                # width=float("inf"),
                allow_unicode=True,
                default_style='"',
            )

--- a/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
+++ b/lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
 import re
 import sys
-
 import unicodedata

 from lm_eval.filters.extraction import RegexFilter
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
    """ """

    def __init__(
-            self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", group_select=0, fallback: str = "[invalid]",
-            ignore_case=False, ignore_punctuation=False, regexes_to_ignore=None,
+        self,
+        regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
+        group_select=0,
+        fallback: str = "[invalid]",
+        ignore_case=False,
+        ignore_punctuation=False,
+        regexes_to_ignore=None,
    ) -> None:
        """
        regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
                    match = convert_dict[match]
            return match

-        punct_tbl = dict.fromkeys(i for i in range(sys.maxunicode)
-                                  if unicodedata.category(chr(i)).startswith('P'))
+        punct_tbl = dict.fromkeys(
+            i
+            for i in range(sys.maxunicode)
+            if unicodedata.category(chr(i)).startswith("P")
+        )

        def filter_ignores(st):
            if self.regexes_to_ignore is not None:
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
        for r, doc in zip(resps, docs):
            fallback_regexes = []
            choice_to_alpha = {}
-            next_alpha = 'A'
+            next_alpha = "A"

            without_paren_fallback_regexes = []
            without_paren_to_target = {}

-            choices = doc['choices']
+            choices = doc["choices"]
            for c in choices:
                m = filter_ignores(c.strip())
                fallback_regexes.append(f"{re.escape(m)}")
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
                without_paren_to_target[next_alpha] = f"({next_alpha})"

                next_alpha = chr(ord(next_alpha) + 1)
-            fallback_regex = re.compile('|'.join(fallback_regexes))
-            without_paren_fallback_regex = '|'.join(without_paren_fallback_regexes)
-            without_paren_fallback_regex = re.compile(f":[\s]*({without_paren_fallback_regex})")
+            fallback_regex = re.compile("|".join(fallback_regexes))
+            without_paren_fallback_regex = "|".join(without_paren_fallback_regexes)
+            without_paren_fallback_regex = re.compile(
+                f":[\s]*({without_paren_fallback_regex})"
+            )

            filtered = []
            for resp in r:
                match = find_match(self.regex, resp)
                if not match:
-                    match = find_match(fallback_regex, filter_ignores(resp), choice_to_alpha)
+                    match = find_match(
+                        fallback_regex, filter_ignores(resp), choice_to_alpha
+                    )
                    if not match:
-                        match = find_match(without_paren_fallback_regex, resp, without_paren_to_target)
+                        match = find_match(
+                            without_paren_fallback_regex, resp, without_paren_to_target
+                        )
                if not match:
                    match = self.fallback
                filtered.append(match)

--- a/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
+++ b/lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
 import re
 import sys
-
 import unicodedata

 from lm_eval.filters.extraction import RegexFilter
@@ -10,8 +9,13 @@ class MultiChoiceRegexFilter(RegexFilter):
    """ """

    def __init__(
-            self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", group_select=0, fallback: str = "[invalid]",
-            ignore_case=False, ignore_punctuation=False, regexes_to_ignore=None,
+        self,
+        regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
+        group_select=0,
+        fallback: str = "[invalid]",
+        ignore_case=False,
+        ignore_punctuation=False,
+        regexes_to_ignore=None,
    ) -> None:
        """
        regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
@@ -44,8 +48,11 @@ class MultiChoiceRegexFilter(RegexFilter):
                    match = convert_dict[match]
            return match

-        punct_tbl = dict.fromkeys(i for i in range(sys.maxunicode)
-                                  if unicodedata.category(chr(i)).startswith('P'))
+        punct_tbl = dict.fromkeys(
+            i
+            for i in range(sys.maxunicode)
+            if unicodedata.category(chr(i)).startswith("P")
+        )

        def filter_ignores(st):
            if self.regexes_to_ignore is not None:
@@ -65,12 +72,12 @@ class MultiChoiceRegexFilter(RegexFilter):
        for r, doc in zip(resps, docs):
            fallback_regexes = []
            choice_to_alpha = {}
-            next_alpha = 'A'
+            next_alpha = "A"

            without_paren_fallback_regexes = []
            without_paren_to_target = {}

-            choices = doc['choices']
+            choices = doc["choices"]
            for c in choices:
                m = filter_ignores(c.strip())
                fallback_regexes.append(f"{re.escape(m)}")
@@ -80,17 +87,23 @@ class MultiChoiceRegexFilter(RegexFilter):
                without_paren_to_target[next_alpha] = f"({next_alpha})"

                next_alpha = chr(ord(next_alpha) + 1)
-            fallback_regex = re.compile('|'.join(fallback_regexes))
-            without_paren_fallback_regex = '|'.join(without_paren_fallback_regexes)
-            without_paren_fallback_regex = re.compile(f":[\s]*({without_paren_fallback_regex})")
+            fallback_regex = re.compile("|".join(fallback_regexes))
+            without_paren_fallback_regex = "|".join(without_paren_fallback_regexes)
+            without_paren_fallback_regex = re.compile(
+                f":[\s]*({without_paren_fallback_regex})"
+            )

            filtered = []
            for resp in r:
                match = find_match(self.regex, resp)
                if not match:
-                    match = find_match(fallback_regex, filter_ignores(resp), choice_to_alpha)
+                    match = find_match(
+                        fallback_regex, filter_ignores(resp), choice_to_alpha
+                    )
                    if not match:
-                        match = find_match(without_paren_fallback_regex, resp, without_paren_to_target)
+                        match = find_match(
+                            without_paren_fallback_regex, resp, without_paren_to_target
+                        )
                if not match:
                    match = self.fallback
                filtered.append(match)

--- a/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
+++ b/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
-import yaml
 import datasets
-
+import yaml
 from tqdm import tqdm



--- a/lm_eval/tasks/model_written_evals/persona/_generate_configs.py
+++ b/lm_eval/tasks/model_written_evals/persona/_generate_configs.py
-import yaml
 import datasets
-
+import yaml
 from tqdm import tqdm



--- a/lm_eval/tasks/okapi/arc_multilingual/utils.py
+++ b/lm_eval/tasks/okapi/arc_multilingual/utils.py
-import datasets
 import re

+import datasets
+

 def preprocess(text):
    if text is None:
@@ -18,7 +19,13 @@ def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
        out_doc = {
            "id": doc["id"],
            "query": "Question: " + preprocess(doc["instruction"]) + "\nAnswer:",
-            "choices": [preprocess(doc['option_a']), preprocess(doc['option_b']), preprocess(doc['option_c']), preprocess(doc['option_d']), preprocess(doc['option_e'])],
+            "choices": [
+                preprocess(doc["option_a"]),
+                preprocess(doc["option_b"]),
+                preprocess(doc["option_c"]),
+                preprocess(doc["option_d"]),
+                preprocess(doc["option_e"]),
+            ],
            "gold": ["A", "B", "C", "D", "E"].index(doc["answer"]),
        }
        return out_doc