Commit e1ae8a2f authored by Herbie Bradley's avatar Herbie Bradley
Browse files

Merge remote-tracking branch 'origin/big-refactor' into calibration

parents 50e99bd7 30936bc7
"dataset_name": "web_of_lies"
"description": "Evaluate a random boolean function expressed as a word problem.\n\n"
"doc_to_text": "Q: Question: Fidel tells the truth. Jerry says Fidel tells the truth. Vina says Jerry tells the truth. Millicent says Vina lies. Raymond says Millicent lies. Does Raymond tell the truth?\nA: Yes\n\nQ: Question: Kristian lies. Millie says Kristian lies. Maybelle says Millie tells the truth. Fidel says Maybelle lies. Leda says Fidel lies. Does Leda tell the truth?\nA: Yes\n\nQ: Question: Kristian tells the truth. Michaela says Kristian lies. Raymond says Michaela tells the truth. Osvaldo says Raymond tells the truth. Jamey says Osvaldo tells the truth. Does Jamey tell the truth?\nA: No\n\nQ: {{input}}\nA:"
"include": "_flan_fewshot_template_yaml"
"task": "bbh_flan_fewshot_web_of_lies"
"dataset_name": "word_sorting"
"description": "Sort a list of words.\n\n"
"doc_to_text": "Q: Sort the following words alphabetically: List: oven costume counterpart\nA: costume counterpart oven\n\nQ: Sort the following words alphabetically: List: hypochlorite ponderosa phone credulity\nA: credulity hypochlorite phone ponderosa\n\nQ: Sort the following words alphabetically: List: newt arson parthia seismography mugho aspect census\nA: arson aspect census mugho newt parthia seismography\n\nQ: {{input}}\nA:"
"include": "_flan_fewshot_template_yaml"
"task": "bbh_flan_fewshot_word_sorting"
group: bbh_flan_zeroshot
dataset_path: lukaemon/bbh
output_type: generate_until
test_split: test
doc_to_target: "{{target}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
# ignore_case: true
# ignore_punctuation: true
generation_kwargs:
until:
- "</s>"
do_sample: false
temperature: 0.0
"dataset_name": "boolean_expressions"
"description": "Evaluate the result of a random Boolean expression.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_boolean_expressions"
"dataset_name": "causal_judgement"
"description": "Answer questions about causal attribution.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_causal_judgement"
"dataset_name": "date_understanding"
"description": "Infer the date from context.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_date_understanding"
"dataset_name": "disambiguation_qa"
"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_disambiguation_qa"
"dataset_name": "dyck_languages"
"description": "Correctly close a Dyck-n word.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_dyck_languages"
"dataset_name": "formal_fallacies"
"description": "Distinguish deductively valid arguments from formal fallacies.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_formal_fallacies"
"dataset_name": "geometric_shapes"
"description": "Name geometric shapes from their SVG paths.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_geometric_shapes"
"dataset_name": "hyperbaton"
"description": "Order adjectives correctly in English sentences.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_hyperbaton"
"dataset_name": "logical_deduction_five_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_logical_deduction_five_objects"
"dataset_name": "logical_deduction_seven_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_logical_deduction_seven_objects"
"dataset_name": "logical_deduction_three_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_logical_deduction_three_objects"
"dataset_name": "movie_recommendation"
"description": "Recommend movies similar to the given list of movies.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_movie_recommendation"
"dataset_name": "multistep_arithmetic_two"
"description": "Solve multi-step arithmetic problems.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_multistep_arithmetic_two"
"dataset_name": "navigate"
"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_navigate"
"dataset_name": "object_counting"
"description": "Questions that involve enumerating objects and asking the model to count them.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_object_counting"
"dataset_name": "penguins_in_a_table"
"description": "Answer questions about a table of penguins and their attributes.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_penguins_in_a_table"
"dataset_name": "reasoning_about_colored_objects"
"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_reasoning_about_colored_objects"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment