fix merge conflicts

f66fc06f · haileyschoelkopf · b13753cd · d714fc95 · b13753cd · b13753cd
Commit f66fc06f authored Feb 01, 2024 by haileyschoelkopf
20 changed files
--- a/lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
+++ b/lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
-# Flan Prompt Templates
-prompts:
-  "template-0":
-    doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-1":
-    doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-2":
-    doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-3":
-    doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-4":
-    doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-5":
-    doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-6":
-    doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-7":
-    doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
-  "template-8":
-    doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
-    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
--- a/lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
+++ b/lm_eval/tasks/benchmarks/flan/prompt_templates/arc.yaml
-# Flan Prompt Templates
-prompts:
-  "template-0":
-    doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
-  "template-1":
-    doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
-  "template-2":
-    doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
-  "template-3":
-    doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
-  "template-4":
-    doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
-  "template-5":
-    doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
-  "template-6":
-    doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
-    doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
--- a/lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
+++ b/lm_eval/tasks/benchmarks/flan/prompt_templates/boolq.yaml
-# Flan Prompt Templates
-prompts:
-  "template-0":
-    doc_to_text: "{{passage}}\n\nCan we conclude that {{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-1":
-    doc_to_text: "{{passage}}\n\nIs it true that {{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-2":
-    doc_to_text: "{{passage}}\n\n{{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-3":
-    doc_to_text: "Text: {{passage}}\n\nQuestion: {{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-4":
-    doc_to_text: "{{passage}}\n\nWhat's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-5":
-    doc_to_text: "{{passage}}\nBased on the above text what's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-6":
-    doc_to_text: "{{passage}}\nAnswer this question making sure that the answer is supposed by the text: {{question}}?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-7":
-    doc_to_text: "{{passage}}\n\nIs the following statement correct based on the text\n\n{{question}}\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-8":
-    # doc_to_text: "{{title}}\n\n{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_text: "{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
-  "template-9":
-    doc_to_text: "Is it true that {{question}} based on the following text?\n\n{{passage}}\n\nOPTIONS:\n- no\n- yes"
-    doc_to_target: "{{['no', 'yes'][label]}}"
--- a/lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
+++ b/lm_eval/tasks/benchmarks/flan/prompt_templates/rte.yaml
-# Flan Prompt Templates
-prompts:
-  "template-0":
-    doc_to_text: "{{premise}}\n\nQuestion with options: Based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- yes\n- no"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-1":
-    doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that the sentence below is true?\n{{hypothesis}}\n\nOPTIONS:\n- yes\n- no"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-2":
-    doc_to_text: "{{premise}}\n\nQ with options: Can we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- yes\n- no"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-3":
-    doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- yes\n- no"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-4":
-    doc_to_text: "{{premise}}\nOPTIONS:\n- yes\n- no\nQuestion: Can we infer the following?\n{{hypothesis}}"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-5":
-    doc_to_text: "Read the following paragraph and determine if the hypothesis is true. Select from options at the end:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- yes\n- no\nThe answer is"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-6":
-    doc_to_text: "Read the text and determine if the sentence is true:\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- yes\n- no\nA:"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-7":
-    doc_to_text: "Question with options: can we draw the following hypothesis from the context? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- yes\n- no\nA:"
-    doc_to_target: "{{['yes', 'no'][label]}}"
-  "template-8":
-    doc_to_text: "Determine if the sentence is true based on the text below. Choose from options.\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- yes\n- no"
-    doc_to_target: "{{['yes', 'no'][label]}}"
--- a/lm_eval/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
+++ b/lm_eval/tasks/benchmarks/flan/yaml_templates/cot_template_yaml
-group: flan-cot
-output_type: generate_until
-validation_split: validation
-doc_to_target: "{{answer}}"
-metric_list:
-  - metric: exact_match
-    aggregation: mean
-    higher_is_better: true
-generation_kwargs:
-  until:
-    - "\n\n"
-  do_sample: false
-  temperature: 0.0
-filter_list:
-  - name: "get-answer"
-    filter:
-      - function: "regex"
-        regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
-      - function: "take_first"
-metadata:
-  version: 1.0
--- a/lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
+++ b/lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
@@ -5,19 +5,13 @@ task:
  - medqa_4options
  - task: mmlu_anatomy
    task_alias: "anatomy (mmlu)"
-    group_alias: null
  - task: mmlu_clinical_knowledge
    task_alias: "clinical_knowledge (mmlu)"
-    group_alias: null
  - task: mmlu_college_medicine
    task_alias: "college_medicine (mmlu)"
-    group_alias: null
  - task: mmlu_medical_genetics
    task_alias: "medical_genetics (mmlu)"
-    group_alias: null
  - task: mmlu_professional_medicine
    task_alias: "professional_medicine (mmlu)"
-    group_alias: null
  - task: mmlu_college_biology
    task_alias: "college_biology (mmlu)"
-    group_alias: null
--- a/lm_eval/tasks/bigbench/generate_tasks.py
+++ b/lm_eval/tasks/bigbench/generate_tasks.py
@@ -181,7 +181,7 @@ def main() -> None:
        for task in all_subtasks:
            file_name = f"{task}.yaml"
            try:
-                with open(f"{path}/{file_name}", "w") as f:
+                with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
                    f.write("# Generated by utils.py\n")
                    yaml.dump(
                        {

--- a/lm_eval/tasks/blimp/generate_configs.py
+++ b/lm_eval/tasks/blimp/generate_configs.py
@@ -75,7 +75,7 @@ def main() -> None:
    for task in all_subtasks:
        file_name = f"{task}.yaml"
        try:
-            with open(f"{file_name}", "w") as f:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
                f.write("# Generated by utils.py\n")
                yaml.dump(
                    {

--- a/lm_eval/tasks/ceval/_generate_configs.py
+++ b/lm_eval/tasks/ceval/_generate_configs.py
@@ -79,13 +79,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    for subject_eng, subject_zh in tqdm(SUBJECTS.items()):
@@ -107,7 +107,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml"
        eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/cmmlu/_generate_configs.py
+++ b/lm_eval/tasks/cmmlu/_generate_configs.py
@@ -94,13 +94,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    for subject_eng, subject_zh in tqdm(SUBJECTS.items()):
@@ -122,7 +122,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{subject_eng}.yaml"
        eval_logger.info(f"Saving yaml for subset {subject_eng} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/code_x_glue/code-text/bleu.py
+++ b/lm_eval/tasks/code_x_glue/code-text/bleu.py
@@ -184,7 +184,7 @@ def splitPuncts(line):
 def computeMaps(predictions, goldfile):
    predictionMap: Dict[str, list] = {}
    goldMap: Dict[str, list] = {}
-    gf = open(goldfile, "r")
+    gf = open(goldfile, "r", encoding="utf-8")

    for row in predictions:
        cols = row.strip().split("\t")

--- a/lm_eval/tasks/csatqa/_generate_configs.py
+++ b/lm_eval/tasks/csatqa/_generate_configs.py
@@ -25,7 +25,7 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our other YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    for name in tqdm(SUBSETS):
@@ -39,7 +39,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{name.lower()}.yaml"
        eval_logger.info(f"Saving yaml for subset {name} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,

--- a/lm_eval/tasks/gsm8k/gsm8k-cot.yaml
+++ b/lm_eval/tasks/gsm8k/gsm8k-cot.yaml
@@ -41,3 +41,4 @@ filter_list:
      - function: "take_first"
 metadata:
  version: 2.0
+  num_fewshot: 8
--- a/lm_eval/tasks/gsm8k/gsm8k.yaml
+++ b/lm_eval/tasks/gsm8k/gsm8k.yaml
@@ -24,6 +24,7 @@ generation_kwargs:
    - "\n\n"
    - "Question:"
  do_sample: false
+  temperature: 0.0
 repeats: 1
 num_fewshot: 5
 filter_list:

--- a/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+++ b/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
@@ -22,3 +22,4 @@ metric_list:
 num_fewshot: 0
 metadata:
  version: 1.0
+  num_fewshot: 4
--- a/lm_eval/tasks/mmlu/_generate_configs.py
+++ b/lm_eval/tasks/mmlu/_generate_configs.py
@@ -85,13 +85,13 @@ if __name__ == "__main__":

    # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
    base_yaml_name = os.path.split(args.base_yaml_path)[-1]
-    with open(args.base_yaml_path) as f:
+    with open(args.base_yaml_path, encoding="utf-8") as f:
        base_yaml = yaml.full_load(f)

    if args.cot_prompt_path is not None:
        import json

-        with open(args.cot_prompt_path) as f:
+        with open(args.cot_prompt_path, encoding="utf-8") as f:
            cot_file = json.load(f)

    ALL_CATEGORIES = []
@@ -120,7 +120,7 @@ if __name__ == "__main__":

        file_save_path = args.save_prefix_path + f"_{subject}.yaml"
        eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
-        with open(file_save_path, "w") as yaml_file:
+        with open(file_save_path, "w", encoding="utf-8") as yaml_file:
            yaml.dump(
                yaml_dict,
                yaml_file,
@@ -142,7 +142,7 @@ if __name__ == "__main__":
        file_save_path = args.save_prefix_path + ".yaml"

    eval_logger.info(f"Saving benchmark config to {file_save_path}")
-    with open(file_save_path, "w") as yaml_file:
+    with open(file_save_path, "w", encoding="utf-8") as yaml_file:
        yaml.dump(
            {
                "group": f"mmlu_{args.task_prefix}"

--- a/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
+++ b/lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
@@ -9,7 +9,7 @@ def main() -> None:
    for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
        file_name = f"{task}.yaml"
        try:
-            with open(f"{file_name}", "w") as f:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
                f.write("# Generated by _generate_configs.py\n")
                yaml.dump(
                    {

--- a/lm_eval/tasks/model_written_evals/persona/_generate_configs.py
+++ b/lm_eval/tasks/model_written_evals/persona/_generate_configs.py
@@ -9,7 +9,7 @@ def main() -> None:
    for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
        file_name = f"{task}.yaml"
        try:
-            with open(f"{file_name}", "w") as f:
+            with open(f"{file_name}", "w", encoding="utf-8") as f:
                f.write("# Generated by _generate_configs.py\n")
                yaml.dump(
                    {

--- a/lm_eval/tasks/qasper/bool.yaml
+++ b/lm_eval/tasks/qasper/bool.yaml
 group: qasper
 task: qasper_bool
-dataset_path: qasper
+dataset_path: allenai/qasper
 output_type: multiple_choice
 training_split: train
 validation_split: validation

--- a/lm_eval/tasks/qasper/freeform.yaml
+++ b/lm_eval/tasks/qasper/freeform.yaml
 group: qasper
 task: qasper_freeform
-dataset_path: qasper
+dataset_path: allenai/qasper
 output_type: generate_until
 training_split: train
 validation_split: validation