merge conflict

470059f6 · lintangsutawika · b8d7d6c3 · 9d030712 · 470059f6 · 470059f6
Commit 470059f6 authored Nov 24, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/belebele/belebele_urd_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_urd_Latn.yaml
+"dataset_name": "urd_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_urd_Latn"
--- a/lm_eval/tasks/belebele/belebele_uzn_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_uzn_Latn.yaml
+"dataset_name": "uzn_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_uzn_Latn"
--- a/lm_eval/tasks/belebele/belebele_vie_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_vie_Latn.yaml
+"dataset_name": "vie_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_vie_Latn"
--- a/lm_eval/tasks/belebele/belebele_war_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_war_Latn.yaml
+"dataset_name": "war_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_war_Latn"
--- a/lm_eval/tasks/belebele/belebele_wol_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_wol_Latn.yaml
+"dataset_name": "wol_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_wol_Latn"
--- a/lm_eval/tasks/belebele/belebele_xho_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_xho_Latn.yaml
+"dataset_name": "xho_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_xho_Latn"
--- a/lm_eval/tasks/belebele/belebele_yor_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_yor_Latn.yaml
+"dataset_name": "yor_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_yor_Latn"
--- a/lm_eval/tasks/belebele/belebele_zho_Hans.yaml
+++ b/lm_eval/tasks/belebele/belebele_zho_Hans.yaml
+"dataset_name": "zho_Hans"
+"include": "_default_template_yaml"
+"task": "belebele_zho_Hans"
--- a/lm_eval/tasks/belebele/belebele_zho_Hant.yaml
+++ b/lm_eval/tasks/belebele/belebele_zho_Hant.yaml
+"dataset_name": "zho_Hant"
+"include": "_default_template_yaml"
+"task": "belebele_zho_Hant"
--- a/lm_eval/tasks/belebele/belebele_zsm_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_zsm_Latn.yaml
+"dataset_name": "zsm_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_zsm_Latn"
--- a/lm_eval/tasks/belebele/belebele_zul_Latn.yaml
+++ b/lm_eval/tasks/belebele/belebele_zul_Latn.yaml
+"dataset_name": "zul_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_zul_Latn"
--- a/lm_eval/tasks/benchmarks/flan/flan_anli.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_anli.yaml
+group: flan_anli
+task:
+  - include: yaml_templates/held_in_template_yaml
+    task: anli_r1
+    dataset_path: anli
+    use_prompt: prompt_templates/anli.yaml:*
+    validation_split: dev_r1
+  - include: yaml_templates/held_in_template_yaml
+    task: anli_r2
+    dataset_path: anli
+    use_prompt: prompt_templates/anli.yaml:*
+    validation_split: dev_r2
+  - include: yaml_templates/held_in_template_yaml
+    task: anli_r3
+    dataset_path: anli
+    use_prompt: prompt_templates/anli.yaml:*
+    validation_split: dev_r3
--- a/lm_eval/tasks/benchmarks/flan/flan_arc.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_arc.yaml
+group: flan_arc
+task:
+  - include: yaml_templates/held_in_template_yaml
+    task: arc_easy
+    dataset_path: ai2_arc
+    dataset_name: ARC-Easy
+    use_prompt: prompt_templates/arc.yaml:*
+    validation_split: validation
+  - include: yaml_templates/held_in_template_yaml
+    task: arc_challenge
+    dataset_path: ai2_arc
+    dataset_name: ARC-Challenge
+    use_prompt: prompt_templates/arc.yaml:*
+    validation_split: validation
--- a/lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_boolq.yaml
+group: flan_boolq
+task:
+  - include: yaml_templates/held_in_template_yaml
+    dataset_path: super_glue
+    dataset_name: boolq
+    use_prompt: prompt_templates/boolq.yaml:*
+    validation_split: validation
--- a/lm_eval/tasks/benchmarks/flan/flan_cot.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_cot.yaml
+group: flan_cot
+task:
+  - include: yaml_templates/cot_template_yaml
+    dataset_path: gsmk
+    dataset_name: boolq
+    use_prompt: promptsource:*
+    validation_split: validation
+  - include: yaml_templates/cot_template_yaml
+    dataset_path: EleutherAI/asdiv
+    use_prompt: promptsource:*
+    validation_split: validation
--- a/lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+group: flan_held_in
+task:
+  - flan_boolq
+  - flan_rte
+  - flan_anli
+  - flan_arc
--- a/lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_held_in_yaml
+group: flan_held_in
+task:
+  - include: flan/yaml_templates/held_in_template_yaml
+    dataset_path: super_glue
+    dataset_name: boolq
+    use_prompt: flan/prompt_templates/boolq.yaml:*
+    validation_split: validation
+  - include: flan/yaml_templates/held_in_template_yaml
+    dataset_path: super_glue
+    dataset_name: rte
+    use_prompt: flan/prompt_templates/rte.yaml:*
+    validation_split: validation
+  - include: flan/yaml_templates/held_in_template_yaml
+    task: anli_r1
+    dataset_path: anli
+    use_prompt: flan/prompt_templates/anli.yaml:*
+    validation_split: dev_r1
+  - include: flan/yaml_templates/held_in_template_yaml
+    task: anli_r2
+    dataset_path: anli
+    use_prompt: flan/prompt_templates/anli.yaml:*
+    validation_split: dev_r2
+  - include: flan/yaml_templates/held_in_template_yaml
+    task: anli_r3
+    dataset_path: anli
+    use_prompt: flan/prompt_templates/anli.yaml:*
+    validation_split: dev_r3
+  - include: flan/yaml_templates/held_in_template_yaml
+    task: arc_easy
+    dataset_path: ai2_arc
+    dataset_name: ARC-Easy
+    use_prompt: flan/prompt_templates/arc.yaml:*
+    validation_split: validation
+  - include: flan/yaml_templates/held_in_template_yaml
+    task: arc_challenge
+    dataset_path: ai2_arc
+    dataset_name: ARC-Challenge
+    use_prompt: flan/prompt_templates/arc.yaml:*
+    validation_split: validation
--- a/lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+group: flan_held_out
+task:
+  # BBH
+  - bbh_flan_zeroshot
+  - bbh_flan_fewshot
+  - bbh_flan_cot_fewshot
+  - bbh_flan_cot_zeroshot
+  # MMLU
+  - mmlu
+  - mmlu_flan_n_shot_generative
+  - mmlu_flan_n_shot_loglikelihood
+  - mmlu_flan_cot_zeroshot
+  - mmlu_flan_cot_fewshot
--- a/lm_eval/tasks/benchmarks/flan/flan_rte.yaml
+++ b/lm_eval/tasks/benchmarks/flan/flan_rte.yaml
+group: flan_rte
+task:
+  - include: yaml_templates/held_in_template_yaml
+    dataset_path: super_glue
+    dataset_name: rte
+    use_prompt: prompt_templates/rte.yaml:*
+    validation_split: validation
--- a/lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
+++ b/lm_eval/tasks/benchmarks/flan/prompt_templates/anli.yaml
+# Flan Prompt Templates
+prompts:
+  "template-0":
+    doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-1":
+    doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-2":
+    doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-3":
+    doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-4":
+    doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-5":
+    doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-6":
+    doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-7":
+    doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  "template-8":
+    doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+    doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"