Commit 173b2bc3 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into humaneval

# Conflicts:
#	lm_eval/api/task.py
parents 74344829 bb098f13
"fewshot_split": "tha_Thai"
"include": "_default_template_yaml"
"task": "belebele_tha_Thai"
"test_split": "tha_Thai"
dataset_name: tha_Thai
fewshot_split: test
include: _default_template_yaml
task: belebele_tha_Thai
test_split: test
"fewshot_split": "tir_Ethi"
"include": "_default_template_yaml"
"task": "belebele_tir_Ethi"
"test_split": "tir_Ethi"
dataset_name: tir_Ethi
fewshot_split: test
include: _default_template_yaml
task: belebele_tir_Ethi
test_split: test
"fewshot_split": "tsn_Latn"
"include": "_default_template_yaml"
"task": "belebele_tsn_Latn"
"test_split": "tsn_Latn"
dataset_name: tsn_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_tsn_Latn
test_split: test
"fewshot_split": "tso_Latn"
"include": "_default_template_yaml"
"task": "belebele_tso_Latn"
"test_split": "tso_Latn"
dataset_name: tso_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_tso_Latn
test_split: test
"fewshot_split": "tur_Latn"
"include": "_default_template_yaml"
"task": "belebele_tur_Latn"
"test_split": "tur_Latn"
dataset_name: tur_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_tur_Latn
test_split: test
"fewshot_split": "ukr_Cyrl"
"include": "_default_template_yaml"
"task": "belebele_ukr_Cyrl"
"test_split": "ukr_Cyrl"
dataset_name: ukr_Cyrl
fewshot_split: test
include: _default_template_yaml
task: belebele_ukr_Cyrl
test_split: test
"fewshot_split": "urd_Arab"
"include": "_default_template_yaml"
"task": "belebele_urd_Arab"
"test_split": "urd_Arab"
dataset_name: urd_Arab
fewshot_split: test
include: _default_template_yaml
task: belebele_urd_Arab
test_split: test
"fewshot_split": "urd_Latn"
"include": "_default_template_yaml"
"task": "belebele_urd_Latn"
"test_split": "urd_Latn"
dataset_name: urd_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_urd_Latn
test_split: test
"fewshot_split": "uzn_Latn"
"include": "_default_template_yaml"
"task": "belebele_uzn_Latn"
"test_split": "uzn_Latn"
dataset_name: uzn_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_uzn_Latn
test_split: test
"fewshot_split": "vie_Latn"
"include": "_default_template_yaml"
"task": "belebele_vie_Latn"
"test_split": "vie_Latn"
dataset_name: vie_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_vie_Latn
test_split: test
"fewshot_split": "war_Latn"
"include": "_default_template_yaml"
"task": "belebele_war_Latn"
"test_split": "war_Latn"
dataset_name: war_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_war_Latn
test_split: test
"fewshot_split": "wol_Latn"
"include": "_default_template_yaml"
"task": "belebele_wol_Latn"
"test_split": "wol_Latn"
dataset_name: wol_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_wol_Latn
test_split: test
"fewshot_split": "xho_Latn"
"include": "_default_template_yaml"
"task": "belebele_xho_Latn"
"test_split": "xho_Latn"
dataset_name: xho_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_xho_Latn
test_split: test
"fewshot_split": "yor_Latn"
"include": "_default_template_yaml"
"task": "belebele_yor_Latn"
"test_split": "yor_Latn"
dataset_name: yor_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_yor_Latn
test_split: test
"fewshot_split": "zho_Hans"
"include": "_default_template_yaml"
"task": "belebele_zho_Hans"
"test_split": "zho_Hans"
dataset_name: zho_Hans
fewshot_split: test
include: _default_template_yaml
task: belebele_zho_Hans
test_split: test
"fewshot_split": "zho_Hant"
"include": "_default_template_yaml"
"task": "belebele_zho_Hant"
"test_split": "zho_Hant"
dataset_name: zho_Hant
fewshot_split: test
include: _default_template_yaml
task: belebele_zho_Hant
test_split: test
"fewshot_split": "zsm_Latn"
"include": "_default_template_yaml"
"task": "belebele_zsm_Latn"
"test_split": "zsm_Latn"
dataset_name: zsm_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_zsm_Latn
test_split: test
"fewshot_split": "zul_Latn"
"include": "_default_template_yaml"
"task": "belebele_zul_Latn"
"test_split": "zul_Latn"
dataset_name: zul_Latn
fewshot_split: test
include: _default_template_yaml
task: belebele_zul_Latn
test_split: test
......@@ -4,48 +4,51 @@ task:
# ANLI R1
- group: anli_r1_flan
group_alias: ANLI R1
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: anli_r1
- task: anli_r1_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-7
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r1
- task: anli_r1_prompt-8
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
......@@ -53,48 +56,51 @@ task:
# ANLI R2
- group: anli_r2_flan
group_alias: ANLI R2
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: anli_r2
- task: anli_r2_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-7
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r2
- task: anli_r2_prompt-8
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
......@@ -102,48 +108,51 @@ task:
# ANLI R3
- group: anli_r3_flan
group_alias: ANLI R3
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: anli_r3
- task: anli_r3_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nChoose your answer: based on the paragraph above can we conclude that \"{{hypothesis}}\"?\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nI think the answer is"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nBased on that paragraph can we conclude that this sentence is true?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{premise}}\n\nCan we draw the following conclusion?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nDoes this next sentence follow, given the preceding text?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{premise}}\nCan we infer the following?\n{{hypothesis}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nThe answer is:"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true:\n\n{{premise}}\n\nOPTIONS:\n- Yes\n- It's impossible to say\n- No\nHypothesis: {{hypothesis}}\n\n\n"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true (see options at the end):\n\n{{premise}}\n\nSentence: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-7
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Can we draw the following hypothesis from the context (see options)? \n\nContext:\n\n{{premise}}\n\nHypothesis: {{hypothesis}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
doc_to_target: "{{[\"Yes\", \"It's impossible to say\", \"No\"][label]}}"
- task: anli_r3
- task: anli_r3_prompt-8
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Choose from options: Determine if the sentence is true based on the text below:\n{{hypothesis}}\n\n{{premise}}\nOPTIONS:\n- Yes\n- It's impossible to say\n- No"
......@@ -151,38 +160,41 @@ task:
# Arc Easy
- group: arc_easy_flan
group_alias: Arc Easy
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: arc_easy
- task: arc_easy_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
- task: arc_easy_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
- task: arc_easy_prompt-2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
- task: arc_easy_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
- task: arc_easy_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
- task: arc_easy_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_easy
- task: arc_easy_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
......@@ -190,38 +202,41 @@ task:
# Arc Challenge
- group: arc_challenge_flan
group_alias: Arc Challenge
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: arc_challenge
- task: arc_challenge_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
- task: arc_challenge_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}\nAnswer:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
- task: arc_challenge_prompt-2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "Question: {{question}}\n\nWhat is the correct answer to the question from the following choices?\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
- task: arc_challenge_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Q: {{question}}\nWhat is the correct answer to this question?\nOPTIONS:\n- {{choices.text|join('\n- ')}}...A:"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
- task: arc_challenge_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "Choose your answer?\n\n{{question}}\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
- task: arc_challenge_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Answer the question\n\n{{question}}\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
doc_to_target: "{{choices.text[choices.label.index(answerKey)]}}"
- task: arc_challenge
- task: arc_challenge_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{question}}\n\nPick the answer from these options\n\nOPTIONS:\n- {{choices.text|join('\n- ')}}"
......@@ -229,53 +244,56 @@ task:
# BoolQ
- group: boolq_flan
group_alias: BoolQ
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: boolq
- task: boolq_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nCan we conclude that {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs it true that {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-2
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\n{{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "Text: {{passage}}\n\nQuestion: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nWhat's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "{{passage}}\nBased on the above text what's the best answer to this question: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "{{passage}}\nAnswer this question making sure that the answer is supposed by the text: {{question}}?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-7
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs the following statement correct based on the text\n\n{{question}}\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-8
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "{{passage}}\n\nIs this statement correct \"{{question}}\"?\n\nOPTIONS:\n- no\n- yes"
doc_to_target: "{{['no', 'yes'][label]}}"
- task: boolq
- task: boolq_prompt-9
task_alias: prompt-9
include: _held_in_template_yaml
doc_to_text: "Is it true that {{question}} based on the following text?\n\n{{passage}}\n\nOPTIONS:\n- no\n- yes"
......@@ -283,48 +301,51 @@ task:
# RTE
- group: rte_flan
group_alias: RTE
aggregate_metric_list:
- metric: acc
weight_by_size: True
task:
- task: rte
- task: rte_prompt-0
task_alias: prompt-0
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nQuestion with options: Based on the paragraph above can we conclude that \"{{sentence2}}\"?\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-1
task_alias: prompt-1
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nBased on that paragraph can we conclude that the sentence below is true?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-1
task_alias: prompt-2
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\n\nQ with options: Can we draw the following conclusion?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-3
task_alias: prompt-3
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\nDoes this next sentence follow, given the preceding text?\n{{sentence2}}\n\nOPTIONS:\n- yes\n- no"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-4
task_alias: prompt-4
include: _held_in_template_yaml
doc_to_text: "{{sentence1}}\nOPTIONS:\n- yes\n- no\nQuestion: Can we infer the following?\n{{sentence2}}"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-5
task_alias: prompt-5
include: _held_in_template_yaml
doc_to_text: "Read the following paragraph and determine if the hypothesis is true. Select from options at the end:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nThe answer is"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-6
task_alias: prompt-6
include: _held_in_template_yaml
doc_to_text: "Read the text and determine if the sentence is true:\n\n{{sentence1}}\n\nSentence: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-7
task_alias: prompt-7
include: _held_in_template_yaml
doc_to_text: "Question with options: can we draw the following hypothesis from the context? \n\nContext:\n\n{{sentence1}}\n\nHypothesis: {{sentence2}}\nOPTIONS:\n- yes\n- no\nA:"
doc_to_target: "{{['yes', 'no'][label]}}"
- task: rte
- task: rte_prompt-8
task_alias: prompt-8
include: _held_in_template_yaml
doc_to_text: "Determine if the sentence is true based on the text below. Choose from options.\n{{sentence2}}\n\n{{sentence1}}\nOPTIONS:\n- yes\n- no"
......
......@@ -7,3 +7,9 @@ task:
- minerva_math_num_theory
- minerva_math_prealgebra
- minerva_math_precalc
aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment