Unverified Commit da211969 authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge branch 'EleutherAI:main' into main

parents 1b97e487 801322e0
task: bertaqa_en_mt_latxa-70b-v1.1
include: _bertaqa_template
dataset_name: en_mt_latxa-70b-v1.1
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_latxa-70b-v1
include: _bertaqa_template
dataset_name: en_mt_latxa-70b-v1
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_latxa-7b-v1.1
include: _bertaqa_template
dataset_name: en_mt_latxa-7b-v1.1
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_latxa-7b-v1
include: _bertaqa_template
dataset_name: en_mt_latxa-7b-v1
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_llama-2-13b
include: _bertaqa_template
dataset_name: en_mt_llama-2-13b
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_llama-2-70b
include: _bertaqa_template
dataset_name: en_mt_llama-2-70b
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_llama-2-7b
include: _bertaqa_template
dataset_name: en_mt_llama-2-7b
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_madlad
include: _bertaqa_template
dataset_name: en_mt_madlad
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_en_mt_nllb
include: _bertaqa_template
dataset_name: en_mt_nllb
doc_to_text: "Question: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nAnswer:"
task: bertaqa_eu
include: _bertaqa_template
dataset_name: eu
doc_to_text: "Galdera: {{question}}\nA: {{candidates[0]}}\nB: {{candidates[1]}}\nC: {{candidates[2]}}\nErantzuna:"
import os
import datasets
import yaml
......@@ -173,6 +174,11 @@ all_subtasks = [
"word_unscrambling",
]
skip_tasks = [
"simple_arithmetic_json_multiple_choice",
"simple_arithmetic_multiple_targets_json",
]
def main() -> None:
for path, task_type in zip(
......@@ -183,11 +189,29 @@ def main() -> None:
for task in all_subtasks:
file_name = f"{task}.yaml"
try:
template_file = task_type
if path == "multiple_choice":
print(f"Checking {task} for multiple choices")
if task in skip_tasks:
continue
data = datasets.load_dataset("hails/bigbench", task + "_zero_shot")
multiple_choice_targets = data["default"][0][
"multiple_choice_targets"
]
if len(multiple_choice_targets) == 0:
continue
else:
template_file = "multiple_choice_template_b_yaml"
if set(data["default"][0]["targets"]) < set(
multiple_choice_targets
):
template_file = "multiple_choice_template_a_yaml"
with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": f"../{task_type}",
"include": f"../{template_file}",
"task": "bigbench_"
+ task
+ "_{}".format(task_type.split("_template_yaml")[0]),
......
# Generated by utils.py
dataset_name: abstract_narrative_understanding_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_abstract_narrative_understanding_multiple_choice
# Generated by utils.py
dataset_name: anachronisms_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_anachronisms_multiple_choice
# Generated by utils.py
dataset_name: analogical_similarity_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_analogical_similarity_multiple_choice
# Generated by utils.py
dataset_name: analytic_entailment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_analytic_entailment_multiple_choice
# Generated by utils.py
dataset_name: arithmetic_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_arithmetic_multiple_choice
# Generated by utils.py
dataset_name: ascii_word_recognition_zero_shot
include: ../multiple_choice_template_yaml
task: bigbench_ascii_word_recognition_multiple_choice
# Generated by utils.py
dataset_name: authorship_verification_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_authorship_verification_multiple_choice
# Generated by utils.py
dataset_name: auto_categorization_zero_shot
include: ../multiple_choice_template_yaml
task: bigbench_auto_categorization_multiple_choice
# Generated by utils.py
dataset_name: auto_debugging_zero_shot
include: ../multiple_choice_template_yaml
task: bigbench_auto_debugging_multiple_choice
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment