add prompt variation

66421b57 · lintangsutawika · 55eff889 · 66421b57 · 66421b57 · 66421b57
Commit 66421b57 authored Dec 08, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml
+"dataset_name": "reasoning_about_colored_objects"
+"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_reasoning_about_colored_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml
+"dataset_name": "ruin_names"
+"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_ruin_names"
+"process_docs": !function utils.fix_ruin_names
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml
+"dataset_name": "salient_translation_error_detection"
+"description": "Detect the type of error in an English translation of a German source sentence.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_salient_translation_error_detection"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml
+"dataset_name": "snarks"
+"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_snarks"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml
+"dataset_name": "sports_understanding"
+"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_sports_understanding"
+"doc_to_target": target
+"doc_to_choice": ["yes", "no"]
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml
+"dataset_name": "temporal_sequences"
+"description": "Task description: Answer questions about which times certain events could have occurred.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_temporal_sequences"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml
+"dataset_name": "tracking_shuffled_objects_five_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_five_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml
+"dataset_name": "tracking_shuffled_objects_seven_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_seven_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml
+"dataset_name": "tracking_shuffled_objects_three_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_three_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml
+"dataset_name": "web_of_lies"
+"description": "Evaluate a random boolean function expressed as a word problem.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_web_of_lies"
+"doc_to_target": target
+"doc_to_choice": ["Yes", "No"]
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml
+group: bbh_alt_pv_03_zeroshot
+dataset_path: lukaemon/bbh
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function ../../styles.styles_03
+doc_to_target: !function ../../styles.doc_to_target
+doc_to_choice: !function ../../styles.doc_to_choice
+num_fewshot: 0
+metric_list:
+  - metric: acc
+  - metric: acc_norm
+  - metric: brier_score
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml
+"dataset_name": "boolean_expressions"
+"description": "Evaluate the result of a random Boolean expression.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_boolean_expressions"
+"doc_to_target": target
+"doc_to_choice": ["True", "False"]
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml
+"dataset_name": "causal_judgement"
+"description": "Answer questions about causal attribution.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_causal_judgement"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml
+"dataset_name": "date_understanding"
+"description": "Infer the date from context.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_date_understanding"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml
+"dataset_name": "disambiguation_qa"
+"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_disambiguation_qa"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml
+"dataset_name": "formal_fallacies"
+"description": "Distinguish deductively valid arguments from formal fallacies.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_formal_fallacies"
+"doc_to_target": target
+"doc_to_choice": ["valid", "invalid"]
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml
+"dataset_name": "geometric_shapes"
+"description": "Name geometric shapes from their SVG paths.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_geometric_shapes"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml
+"dataset_name": "hyperbaton"
+"description": "Order adjectives correctly in English sentences.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_hyperbaton"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml
+"dataset_name": "logical_deduction_five_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_logical_deduction_five_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml
+"dataset_name": "logical_deduction_seven_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_logical_deduction_seven_objects"