Commit f2c396ab authored by lintangsutawika's avatar lintangsutawika
Browse files

pre-commit reformat

parent 20a54b3a
...@@ -6,7 +6,7 @@ Title: `Beyond the Imitation Game: Quantifying and extrapolating the capabilitie ...@@ -6,7 +6,7 @@ Title: `Beyond the Imitation Game: Quantifying and extrapolating the capabilitie
Abstract: https://arxiv.org/abs/2206.04615 Abstract: https://arxiv.org/abs/2206.04615
The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to probe large language models and extrapolate their future capabilities. The Beyond the Imitation Game Benchmark (BIG-bench) is a collaborative benchmark intended to probe large language models and extrapolate their future capabilities.
Homepage: https://github.com/google/BIG-bench Homepage: https://github.com/google/BIG-bench
......
...@@ -2,179 +2,182 @@ import os ...@@ -2,179 +2,182 @@ import os
import yaml import yaml
all_subtasks = [ all_subtasks = [
'abstract_narrative_understanding', "abstract_narrative_understanding",
'anachronisms', "anachronisms",
'analogical_similarity', "analogical_similarity",
'analytic_entailment', "analytic_entailment",
'arithmetic', "arithmetic",
'ascii_word_recognition', "ascii_word_recognition",
'authorship_verification', "authorship_verification",
'auto_categorization', "auto_categorization",
'auto_debugging', "auto_debugging",
'bbq_lite_json', "bbq_lite_json",
'bridging_anaphora_resolution_barqa', "bridging_anaphora_resolution_barqa",
'causal_judgment', "causal_judgment",
'cause_and_effect', "cause_and_effect",
'checkmate_in_one', "checkmate_in_one",
'chess_state_tracking', "chess_state_tracking",
'chinese_remainder_theorem', "chinese_remainder_theorem",
'cifar10_classification', "cifar10_classification",
'code_line_description', "code_line_description",
'codenames', "codenames",
'color', "color",
'common_morpheme', "common_morpheme",
'conceptual_combinations', "conceptual_combinations",
'conlang_translation', "conlang_translation",
'contextual_parametric_knowledge_conflicts', "contextual_parametric_knowledge_conflicts",
'crash_blossom', "crash_blossom",
'crass_ai', "crass_ai",
'cryobiology_spanish', "cryobiology_spanish",
'cryptonite', "cryptonite",
'cs_algorithms', "cs_algorithms",
'dark_humor_detection', "dark_humor_detection",
'date_understanding', "date_understanding",
'disambiguation_qa', "disambiguation_qa",
'discourse_marker_prediction', "discourse_marker_prediction",
'disfl_qa', "disfl_qa",
'dyck_languages', "dyck_languages",
'elementary_math_qa', "elementary_math_qa",
'emoji_movie', "emoji_movie",
'emojis_emotion_prediction', "emojis_emotion_prediction",
'empirical_judgments', "empirical_judgments",
'english_proverbs', "english_proverbs",
'english_russian_proverbs', "english_russian_proverbs",
'entailed_polarity', "entailed_polarity",
'entailed_polarity_hindi', "entailed_polarity_hindi",
'epistemic_reasoning', "epistemic_reasoning",
'evaluating_information_essentiality', "evaluating_information_essentiality",
'fact_checker', "fact_checker",
'fantasy_reasoning', "fantasy_reasoning",
'few_shot_nlg', "few_shot_nlg",
'figure_of_speech_detection', "figure_of_speech_detection",
'formal_fallacies_syllogisms_negation', "formal_fallacies_syllogisms_negation",
'gem', "gem",
'gender_inclusive_sentences_german', "gender_inclusive_sentences_german",
'general_knowledge', "general_knowledge",
'geometric_shapes', "geometric_shapes",
'goal_step_wikihow', "goal_step_wikihow",
'gre_reading_comprehension', "gre_reading_comprehension",
'hhh_alignment', "hhh_alignment",
'hindi_question_answering', "hindi_question_answering",
'hindu_knowledge', "hindu_knowledge",
'hinglish_toxicity', "hinglish_toxicity",
'human_organs_senses', "human_organs_senses",
'hyperbaton', "hyperbaton",
'identify_math_theorems', "identify_math_theorems",
'identify_odd_metaphor', "identify_odd_metaphor",
'implicatures', "implicatures",
'implicit_relations', "implicit_relations",
'intent_recognition', "intent_recognition",
'international_phonetic_alphabet_nli', "international_phonetic_alphabet_nli",
'international_phonetic_alphabet_transliterate', "international_phonetic_alphabet_transliterate",
'intersect_geometry', "intersect_geometry",
'irony_identification', "irony_identification",
'kanji_ascii', "kanji_ascii",
'kannada', "kannada",
'key_value_maps', "key_value_maps",
'known_unknowns', "known_unknowns",
'language_games', "language_games",
'language_identification', "language_identification",
'linguistic_mappings', "linguistic_mappings",
'linguistics_puzzles', "linguistics_puzzles",
'list_functions', "list_functions",
'logic_grid_puzzle', "logic_grid_puzzle",
'logical_args', "logical_args",
'logical_deduction', "logical_deduction",
'logical_fallacy_detection', "logical_fallacy_detection",
'logical_sequence', "logical_sequence",
'mathematical_induction', "mathematical_induction",
'matrixshapes', "matrixshapes",
'metaphor_boolean', "metaphor_boolean",
'metaphor_understanding', "metaphor_understanding",
'minute_mysteries_qa', "minute_mysteries_qa",
'misconceptions', "misconceptions",
'misconceptions_russian', "misconceptions_russian",
'mnist_ascii', "mnist_ascii",
'modified_arithmetic', "modified_arithmetic",
'moral_permissibility', "moral_permissibility",
'movie_dialog_same_or_different', "movie_dialog_same_or_different",
'movie_recommendation', "movie_recommendation",
'mult_data_wrangling', "mult_data_wrangling",
'multiemo', "multiemo",
'natural_instructions', "natural_instructions",
'navigate', "navigate",
'nonsense_words_grammar', "nonsense_words_grammar",
'novel_concepts', "novel_concepts",
'object_counting', "object_counting",
'odd_one_out', "odd_one_out",
'operators', "operators",
'paragraph_segmentation', "paragraph_segmentation",
'parsinlu_qa', "parsinlu_qa",
'parsinlu_reading_comprehension', "parsinlu_reading_comprehension",
'penguins_in_a_table', "penguins_in_a_table",
'periodic_elements', "periodic_elements",
'persian_idioms', "persian_idioms",
'phrase_relatedness', "phrase_relatedness",
'physical_intuition', "physical_intuition",
'physics', "physics",
'physics_questions', "physics_questions",
'play_dialog_same_or_different', "play_dialog_same_or_different",
'polish_sequence_labeling', "polish_sequence_labeling",
'presuppositions_as_nli', "presuppositions_as_nli",
'qa_wikidata', "qa_wikidata",
'question_selection', "question_selection",
'real_or_fake_text', "real_or_fake_text",
'reasoning_about_colored_objects', "reasoning_about_colored_objects",
'repeat_copy_logic', "repeat_copy_logic",
'rephrase', "rephrase",
'riddle_sense', "riddle_sense",
'ruin_names', "ruin_names",
'salient_translation_error_detection', "salient_translation_error_detection",
'scientific_press_release', "scientific_press_release",
'semantic_parsing_in_context_sparc', "semantic_parsing_in_context_sparc",
'semantic_parsing_spider', "semantic_parsing_spider",
'sentence_ambiguity', "sentence_ambiguity",
'similarities_abstraction', "similarities_abstraction",
'simp_turing_concept', "simp_turing_concept",
'simple_arithmetic_json', "simple_arithmetic_json",
'simple_arithmetic_json_multiple_choice', "simple_arithmetic_json_multiple_choice",
'simple_arithmetic_json_subtasks', "simple_arithmetic_json_subtasks",
'simple_arithmetic_multiple_targets_json', "simple_arithmetic_multiple_targets_json",
'simple_ethical_questions', "simple_ethical_questions",
'simple_text_editing', "simple_text_editing",
'snarks', "snarks",
'social_iqa', "social_iqa",
'social_support', "social_support",
'sports_understanding', "sports_understanding",
'strange_stories', "strange_stories",
'strategyqa', "strategyqa",
'sufficient_information', "sufficient_information",
'suicide_risk', "suicide_risk",
'swahili_english_proverbs', "swahili_english_proverbs",
'swedish_to_german_proverbs', "swedish_to_german_proverbs",
'symbol_interpretation', "symbol_interpretation",
'temporal_sequences', "temporal_sequences",
'tense', "tense",
'timedial', "timedial",
'topical_chat', "topical_chat",
'tracking_shuffled_objects', "tracking_shuffled_objects",
'understanding_fables', "understanding_fables",
'undo_permutation', "undo_permutation",
'unit_conversion', "unit_conversion",
'unit_interpretation', "unit_interpretation",
'unnatural_in_context_learning', "unnatural_in_context_learning",
'vitaminc_fact_verification', "vitaminc_fact_verification",
'what_is_the_tao', "what_is_the_tao",
'which_wiki_edit', "which_wiki_edit",
'winowhy', "winowhy",
'word_sorting', "word_sorting",
'word_unscrambling' "word_unscrambling",
] ]
def main() -> None: def main() -> None:
for path, task_type in zip(["multiple_choice", "greedy_until"], ["multiple_choice_template_yaml", "greedy_until_template_yaml"]): for path, task_type in zip(
["multiple_choice", "greedy_until"],
["multiple_choice_template_yaml", "greedy_until_template_yaml"],
):
os.makedirs(path, exist_ok=True) os.makedirs(path, exist_ok=True)
for task in all_subtasks: for task in all_subtasks:
file_name = f"{task}.yaml" file_name = f"{task}.yaml"
...@@ -184,11 +187,15 @@ def main() -> None: ...@@ -184,11 +187,15 @@ def main() -> None:
yaml.dump( yaml.dump(
{ {
"include": f"../{task_type}", "include": f"../{task_type}",
"task": "bigbench_" + task + "_{}".format(task_type.split("_template_yaml")[0]), "task": "bigbench_"
"dataset_name": task + "_zero_shot", # zero-shot version of the dataset + task
+ "_{}".format(task_type.split("_template_yaml")[0]),
"dataset_name": task
+ "_zero_shot", # zero-shot version of the dataset
}, },
f, f,
width=float("inf"), allow_unicode=True width=float("inf"),
allow_unicode=True,
) )
except FileExistsError: except FileExistsError:
pass pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment