添加Megatron项目

5add46aa · hepj · deb8370c · 5add46aa · 5add46aa · 5add46aa
Commit 5add46aa authored Jan 09, 2025 by hepj
20 changed files
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_urd_Arab.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_urd_Arab.yaml
+"fewshot_split": "urd_Arab"
+"include": "_default_template_yaml"
+"task": "belebele_urd_Arab"
+"test_split": "urd_Arab"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_urd_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_urd_Latn.yaml
+"fewshot_split": "urd_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_urd_Latn"
+"test_split": "urd_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_uzn_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_uzn_Latn.yaml
+"fewshot_split": "uzn_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_uzn_Latn"
+"test_split": "uzn_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_vie_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_vie_Latn.yaml
+"fewshot_split": "vie_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_vie_Latn"
+"test_split": "vie_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_war_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_war_Latn.yaml
+"fewshot_split": "war_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_war_Latn"
+"test_split": "war_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_wol_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_wol_Latn.yaml
+"fewshot_split": "wol_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_wol_Latn"
+"test_split": "wol_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_xho_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_xho_Latn.yaml
+"fewshot_split": "xho_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_xho_Latn"
+"test_split": "xho_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_yor_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_yor_Latn.yaml
+"fewshot_split": "yor_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_yor_Latn"
+"test_split": "yor_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zho_Hans.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zho_Hans.yaml
+"fewshot_split": "zho_Hans"
+"include": "_default_template_yaml"
+"task": "belebele_zho_Hans"
+"test_split": "zho_Hans"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zho_Hant.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zho_Hant.yaml
+"fewshot_split": "zho_Hant"
+"include": "_default_template_yaml"
+"task": "belebele_zho_Hant"
+"test_split": "zho_Hant"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zsm_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zsm_Latn.yaml
+"fewshot_split": "zsm_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_zsm_Latn"
+"test_split": "zsm_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zul_Latn.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/belebele/belebele_zul_Latn.yaml
+"fewshot_split": "zul_Latn"
+"include": "_default_template_yaml"
+"task": "belebele_zul_Latn"
+"test_split": "zul_Latn"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/flan/_held_in_template_yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/flan/_held_in_template_yaml
+output_type: generate_until
+test_split: null
+doc_to_choice: null
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+generation_kwargs:
+  until:
+    - "</s>"
+  do_sample: false
+  temperature: 0.0
+metadata:
+  version: 1.0
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/flan/flan_held_in.yaml
+group: flan_held_in
+group_alias: Flan (Held-In)
+task:
+  # ANLI R1
+  - group: anli_r1_flan
+    group_alias: ANLI R1
+    task:
+      - task: anli_r1
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-7
+        include: _held_in_template_yaml
+        doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r1
+        task_alias: prompt-8
+        include: _held_in_template_yaml
+        doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  # ANLI R2
+  - group: anli_r2_flan
+    group_alias: ANLI R2
+    task:
+      - task: anli_r2
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-7
+        include: _held_in_template_yaml
+        doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r2
+        task_alias: prompt-8
+        include: _held_in_template_yaml
+        doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  # ANLI R3
+  - group: anli_r3_flan
+    group_alias: ANLI R3
+    task:
+      - task: anli_r3
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-7
+        include: _held_in_template_yaml
+        doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+      - task: anli_r3
+        task_alias: prompt-8
+        include: _held_in_template_yaml
+        doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
+        doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
+  # Arc Easy
+  - group: arc_easy_flan
+    group_alias: Arc Easy
+    task:
+      - task: arc_easy
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_easy
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_easy
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_easy
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_easy
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_easy
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_easy
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+  # Arc Challenge
+  - group: arc_challenge_flan
+    group_alias: Arc Challenge
+    task:
+      - task: arc_challenge
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_challenge
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_challenge
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_challenge
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_challenge
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_challenge
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+      - task: arc_challenge
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
+        doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
+  # BoolQ
+  - group: boolq_flan
+    group_alias: BoolQ
+    task:
+      - task: boolq
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\n\nCan we conclude that {{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\n\nIs it true that {{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\n\n{{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "Text: {{passage}}\n\nQuestion: {{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\n\nWhat's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\nBased on the above text what's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\nAnswer this question making sure that the answer is supposed by the text: {{question}}?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-7
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\n\nIs the following statement correct based on the text\n\n{{question}}\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-8
+        include: _held_in_template_yaml
+        doc_to_text: "{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+      - task: boolq
+        task_alias: prompt-9
+        include: _held_in_template_yaml
+        doc_to_text: "Is it true that {{question}} based on the following text?\n\n{{passage}}\n\nOPTIONS:\n- no\n- yes"
+        doc_to_target: "{{['no', 'yes'][label]}}"
+  # RTE
+  - group: rte_flan
+    group_alias: RTE
+    task:
+      - task: rte
+        task_alias: prompt-0
+        include: _held_in_template_yaml
+        doc_to_text: "{{sentence1}}\n\nQuestion with options: Based on the paragraph above can we conclude that \"{{sentence2}}\"?\n\nOPTIONS:\n- yes\n- no"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-1
+        include: _held_in_template_yaml
+        doc_to_text: "{{sentence1}}\n\nBased on that paragraph can we conclude that the sentence below is true?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-2
+        include: _held_in_template_yaml
+        doc_to_text: "{{sentence1}}\n\nQ with options: Can we draw the following conclusion?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-3
+        include: _held_in_template_yaml
+        doc_to_text: "{{sentence1}}\nDoes this next sentence follow, given the preceding text?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-4
+        include: _held_in_template_yaml
+        doc_to_text: "{{sentence1}}\nOPTIONS:\n- yes\n- no\nQuestion: Can we infer the following?\n{{sentence2}}"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-5
+        include: _held_in_template_yaml
+        doc_to_text: "Read the following paragraph and determine if the hypothesis is true. Select from options at the end:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nThe answer is"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-6
+        include: _held_in_template_yaml
+        doc_to_text: "Read the text and determine if the sentence is true:\n\n{{sentence1}}\n\nSentence: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-7
+        include: _held_in_template_yaml
+        doc_to_text: "Question with options: can we draw the following hypothesis from the context? \n\nContext:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
+        doc_to_target: "{{['yes', 'no'][label]}}"
+      - task: rte
+        task_alias: prompt-8
+        include: _held_in_template_yaml
+        doc_to_text: "Determine if the sentence is true based on the text below. Choose from options.\n{{sentence2}}\n\n{{sentence1}}\nOPTIONS:\n- yes\n- no"
+        doc_to_target: "{{['yes', 'no'][label]}}"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/flan/flan_held_out.yaml
+group: flan_held_out
+task:
+  # BBH
+  - bbh_zeroshot
+  - bbh_fewshot
+  - bbh_cot_fewshot
+  - bbh_cot_zeroshot
+  # MMLU
+  - mmlu
+  - mmlu_flan_n_shot_generative
+  - mmlu_flan_n_shot_loglikelihood
+  - mmlu_flan_cot_zeroshot
+  - mmlu_flan_cot_fewshot
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/minerva_math.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/minerva_math.yaml
+group: minerva_math
+task:
+  - minerva_math_algebra
+  - minerva_math_counting_and_prob
+  - minerva_math_geometry
+  - minerva_math_intermediate_algebra
+  - minerva_math_num_theory
+  - minerva_math_prealgebra
+  - minerva_math_precalc
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/multimedqa/README.md
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/multimedqa/README.md
+# MultiMedQA (multiple-choice subset)
+
+### Paper
+
+Title: Large Language Models Encode Clinical Knowledge
+
+Abstract: https://arxiv.org/abs/2212.13138
+
+A benchmark combining four existing multiple-choice question answering datasets spanning professional medical exams and research queries.
+
+### Citation
+
+```
+@Article{Singhal2023,
+author={Singhal, Karan and Azizi, Shekoofeh and Tu, Tao and Mahdavi, S. Sara and Wei, Jason and Chung, Hyung Won and Scales, Nathan and Tanwani, Ajay and Cole-Lewis, Heather and Pfohl, Stephen and Payne, Perry and Seneviratne, Martin and Gamble, Paul and Kelly, Chris and Babiker, Abubakr and Sch{\"a}rli, Nathanael and Chowdhery, Aakanksha and Mansfield, Philip and Demner-Fushman, Dina and Ag{\"u}era y Arcas, Blaise and Webster, Dale and Corrado, Greg S. and Matias, Yossi and Chou, Katherine and Gottweis, Juraj and Tomasev, Nenad and Liu, Yun and Rajkomar, Alvin and Barral, Joelle and Semturs, Christopher and Karthikesalingam, Alan and Natarajan, Vivek},
+title={Large language models encode clinical knowledge},
+journal={Nature},
+year={2023},
+month={Aug},
+day={01},
+volume={620},
+number={7972},
+pages={172-180},
+issn={1476-4687},
+doi={10.1038/s41586-023-06291-2},
+url={https://doi.org/10.1038/s41586-023-06291-2}
+}
+```
+
+### Tasks
+
+* [PubMedQA](https://pubmedqa.github.io/) - 1,000 expert-labeled Q&A pairs where a question and corresponding PubMed abstract as context is given and the a yes/maybe/no answer must be produced. Unlike the rest of the tasks in this suite, PubMedQA is a closed-domain Q&A task.
+* [MedQA](https://github.com/jind11/MedQA) - US Medical License Exam (USMLE) questions with 4 or 5 possible answers. Typically, only the 4-option questions are used.
+* [MedMCQA](https://medmcqa.github.io/) - 4-option multiple choice questions from Indian medical entrance examinations, >191k total questions.
+* [MMLU](https://arxiv.org/abs/2009.03300) - 4-option multiple choice exam questions from a variety of domains. The following 6 domains are utilized here:
+	* Anatomy
+	* Clinical Knowledge
+	* College Medicine
+	* Medical Genetics
+	* Professional Medicine
+	* College Biology
+
+Note that MultiMedQA also includes some short-form and long-form Q&A tasks (LiveQA, MedicationQA, HealthSearchQA). Evaluation on these tasks is usually done by experts and is not typically performed automatically, and therefore is ignored here.
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/multimedqa/multimedqa.yaml
+group: multimedqa
+task:
+  - pubmedqa
+  - medmcqa
+  - medqa_4options
+  - task: mmlu_anatomy
+    task_alias: "anatomy (mmlu)"
+  - task: mmlu_clinical_knowledge
+    task_alias: "clinical_knowledge (mmlu)"
+  - task: mmlu_college_medicine
+    task_alias: "college_medicine (mmlu)"
+  - task: mmlu_medical_genetics
+    task_alias: "medical_genetics (mmlu)"
+  - task: mmlu_professional_medicine
+    task_alias: "professional_medicine (mmlu)"
+  - task: mmlu_college_biology
+    task_alias: "college_biology (mmlu)"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/openllm.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/openllm.yaml
+group: openllm
+group_alias: Open LLM Leaderboard
+task:
+  - task: arc_challenge
+    fewshot_split: validation
+    num_fewshot: 25
+  - task: hellaswag
+    fewshot_split: train
+    num_fewshot: 10
+  - task: truthfulqa
+    num_fewshot: 0
+  - task: mmlu
+    num_fewshot: 5
+  - task: winogrande
+    fewshot_split: train
+    num_fewshot: 5
+  - task: gsm8k
+    num_fewshot: 5
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/pythia.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/benchmarks/pythia.yaml
+group: pythia
+task:
+  - lambada_openai
+  - logiqa
+  - piqa
+  - sciq
+  - wikitext
+  - winogrande
+  - wsc
+  - ai2_arc
+  - blimp
+  - mmlu