add prompt variation

66421b57 · lintangsutawika · 55eff889 · 66421b57 · 66421b57 · 66421b57
Commit 66421b57 authored Dec 08, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml
+"dataset_name": "sports_understanding"
+"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_01_zeroshot_sports_understanding"
+"doc_to_target": target
+"doc_to_choice": ["yes", "no"]
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml
+"dataset_name": "temporal_sequences"
+"description": "Task description: Answer questions about which times certain events could have occurred.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_01_zeroshot_temporal_sequences"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml
+"dataset_name": "tracking_shuffled_objects_five_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_five_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml
+"dataset_name": "tracking_shuffled_objects_seven_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_seven_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml
+"dataset_name": "tracking_shuffled_objects_three_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_three_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml
+"dataset_name": "web_of_lies"
+"description": "Evaluate a random boolean function expressed as a word problem.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_01_zeroshot_web_of_lies"
+"doc_to_target": target
+"doc_to_choice": ["Yes", "No"]
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml
+group: bbh_alt_pv_02_zeroshot
+dataset_path: lukaemon/bbh
+output_type: multiple_choice
+test_split: test
+doc_to_text: !function ../../styles.styles_02
+doc_to_target: !function ../../styles.doc_to_target
+doc_to_choice: !function ../../styles.doc_to_choice
+num_fewshot: 0
+metric_list:
+  - metric: acc
+  - metric: acc_norm
+  - metric: brier_score
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml
+"dataset_name": "boolean_expressions"
+"description": "Evaluate the result of a random Boolean expression.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_boolean_expressions"
+"doc_to_target": target
+"doc_to_choice": ["True", "False"]
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml
+"dataset_name": "causal_judgement"
+"description": "Answer questions about causal attribution.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_causal_judgement"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml
+"dataset_name": "date_understanding"
+"description": "Infer the date from context.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_date_understanding"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml
+"dataset_name": "disambiguation_qa"
+"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_disambiguation_qa"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml
+"dataset_name": "formal_fallacies"
+"description": "Distinguish deductively valid arguments from formal fallacies.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_formal_fallacies"
+"doc_to_target": target
+"doc_to_choice": ["valid", "invalid"]
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml
+"dataset_name": "geometric_shapes"
+"description": "Name geometric shapes from their SVG paths.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_geometric_shapes"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml
+"dataset_name": "hyperbaton"
+"description": "Order adjectives correctly in English sentences.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_hyperbaton"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml
+"dataset_name": "logical_deduction_five_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_logical_deduction_five_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml
+"dataset_name": "logical_deduction_seven_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_logical_deduction_seven_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml
+"dataset_name": "logical_deduction_three_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_logical_deduction_three_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml
+"dataset_name": "movie_recommendation"
+"description": "Recommend movies similar to the given list of movies.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_movie_recommendation"
+"process_docs": !function ../utils.fix_movie_recommendation
\ No newline at end of file
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml
+"dataset_name": "navigate"
+"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_navigate"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml
+"dataset_name": "penguins_in_a_table"
+"description": "Answer questions about a table of penguins and their attributes.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_02_zeroshot_penguins_in_a_table"