Commit 5d3bf2e7 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into openai_completions
parents f66730c4 bf26d979
"dataset_name": "date_understanding"
"description": "Infer the date from context.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_date_understanding"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_date_understanding"
"dataset_name": "disambiguation_qa"
"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_disambiguation_qa"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_disambiguation_qa"
"dataset_name": "dyck_languages"
"description": "Correctly close a Dyck-n word.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_dyck_languages"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_dyck_languages"
"dataset_name": "formal_fallacies"
"description": "Distinguish deductively valid arguments from formal fallacies.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_formal_fallacies"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_formal_fallacies"
"dataset_name": "geometric_shapes"
"description": "Name geometric shapes from their SVG paths.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_geometric_shapes"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_geometric_shapes"
"dataset_name": "hyperbaton"
"description": "Order adjectives correctly in English sentences.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_hyperbaton"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_hyperbaton"
"dataset_name": "logical_deduction_five_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_logical_deduction_five_objects"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_logical_deduction_five_objects"
"dataset_name": "logical_deduction_seven_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_logical_deduction_seven_objects"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_logical_deduction_seven_objects"
"dataset_name": "logical_deduction_three_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_logical_deduction_three_objects"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_logical_deduction_three_objects"
"dataset_name": "movie_recommendation"
"description": "Recommend movies similar to the given list of movies.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_movie_recommendation"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_movie_recommendation"
"dataset_name": "multistep_arithmetic_two"
"description": "Solve multi-step arithmetic problems.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_multistep_arithmetic_two"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_multistep_arithmetic_two"
"dataset_name": "navigate"
"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_navigate"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_navigate"
"dataset_name": "object_counting"
"description": "Questions that involve enumerating objects and asking the model to count them.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_object_counting"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_object_counting"
"dataset_name": "penguins_in_a_table"
"description": "Answer questions about a table of penguins and their attributes.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_penguins_in_a_table"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_penguins_in_a_table"
"dataset_name": "reasoning_about_colored_objects"
"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_reasoning_about_colored_objects"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_reasoning_about_colored_objects"
"dataset_name": "ruin_names"
"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_ruin_names"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_ruin_names"
"dataset_name": "salient_translation_error_detection"
"description": "Detect the type of error in an English translation of a German source sentence.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_salient_translation_error_detection"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_salient_translation_error_detection"
"dataset_name": "snarks"
"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_snarks"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_snarks"
"dataset_name": "sports_understanding"
"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_sports_understanding"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_sports_understanding"
"dataset_name": "temporal_sequences"
"description": "Task description: Answer questions about which times certain events could have occurred.\n\n"
"doc_to_text": "Q: {{input}}\nA:"
"include": "_flan_zeroshot_template_yaml"
"task": "bbh_flan_zeroshot_temporal_sequences"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_temporal_sequences"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment