removed alt worlds prompts

bbb8386c · lintangsutawika · 3e5e9da2 · 3e5e9da2 · 3e5e9da2 · 3e5e9da2
Commit bbb8386c authored Apr 16, 2024 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml
-include: _template_04_yaml
-task: arithmetic_5ds_alt_04
-dataset_name: arithmetic_5ds
-task_alias: 5ds
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml
-include: ../_template_yaml
-group: arithmetic_alt_05
-group_alias: arithmetic (Style 05)
-dataset_path: EleutherAI/arithmetic
-output_type: loglikelihood
-validation_split: validation
-test_split: null
-doc_to_text: !function ../utils.style_05
-doc_to_target: "{{completion}}"
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
-  - metric: brier_score
-    higher_is_better: false
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml
-include: _template_05_yaml
-task: arithmetic_1dc_alt_05
-dataset_name: arithmetic_1dc
-task_alias: 1dc
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml
-include: _template_05_yaml
-task: arithmetic_2da_alt_05
-dataset_name: arithmetic_2da
-task_alias: 2da
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml
-include: _template_05_yaml
-task: arithmetic_2dm_alt_05
-dataset_name: arithmetic_2dm
-task_alias: 2dm
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml
-include: _template_05_yaml
-task: arithmetic_2ds_alt_05
-dataset_name: arithmetic_2ds
-task_alias: 2ds
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml
-include: _template_05_yaml
-task: arithmetic_3da_alt_05
-dataset_name: arithmetic_3da
-task_alias: 3da
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml
-include: _template_05_yaml
-task: arithmetic_3ds_alt_05
-dataset_name: arithmetic_3ds
-task_alias: 3ds
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml
-include: _template_05_yaml
-task: arithmetic_4da_alt_05
-dataset_name: arithmetic_4da
-task_alias: 4da
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml
-include: _template_05_yaml
-task: arithmetic_4ds_alt_05
-dataset_name: arithmetic_4ds
-task_alias: 4ds
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml
-include: _template_05_yaml
-task: arithmetic_5da_alt_05
-dataset_name: arithmetic_5da
-task_alias: 5da
--- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml
-include: _template_05_yaml
-task: arithmetic_5ds_alt_05
-dataset_name: arithmetic_5ds
-task_alias: 5ds
--- a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py
+++ b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py
-import re
-
-# Original Prompt
-# Question: What is (9 + 8) * 2? Answer:
-
-
-def style_00(docs):
-    # What is (9 + 8) * 2?
-    return docs["context"]
-
-
-def style_01(docs):
-    # What is (9 + 8) * 2?
-    return docs["context"].replace("Question: ", "").replace(" Answer:", "")
-
-
-def style_02(docs):
-    # Q: What is (9 + 8) * 2? A:
-    return docs["context"].replace("Question: ", "Q: ").replace(" Answer:", " A:")
-
-
-def style_03(docs):
-    # Solve (9 + 8) * 2.
-    return (
-        docs["context"].replace("Question: What is", "Solve").replace(" Answer:", ".")
-    )
-
-
-def style_04(docs):
-    # (9 + 8) * 2 =
-    return docs["context"].replace("Question: What is ", "").replace(" Answer:", " =")
-
-
-def style_05(docs):
-    # What is (9 + 8) * 2? Answer:
-    return docs["context"].replace("Question: ", "")
--- a/lm_eval/tasks/bbh/alternative_worlds/README.md
+++ b/lm_eval/tasks/bbh/alternative_worlds/README.md
-| Task               | Prompt Variation  | Output Variation  | Option in Sample |
-| :-----------------:| :---------------: | :---------------: |:---------------: |
-| boolean_expression | Yes               | Yes               | No               |
-| causal_judgement   | Yes               | Yes               | Yes              |
-| date_understanding | Yes               | Yes               | Yes              |
-| disambiguation_qa  | Yes               | Yes               | Yes              |
-| dyck_languages     | Yes               | No                | No               |
-| formal_fallacies   | Yes               | Yes               | Yes              |
-| geometric_shapes   | Yes               | Yes               | Yes              |
-| hyperbaton         | Yes               | Yes               | Yes              |
-| logical_deduction_five_objects| Yes    | Yes               | Yes              |
-| logical_deduction_seven_objects| Yes   | Yes               | Yes              |
-| logical_deduction_three_objects| Yes   | Yes               | Yes              |
-| movie_recommendation| Yes              | Yes               | Yes              |
-| multistep_arithmetic_two| Yes          | No                | No               |
-| navigate           | Yes               | Yes               | Yes              |
-| object_counting    | Yes               | No                | No               |
-| penguins_in_a_table| Yes               | Yes               | Yes              |
-| reasoning_about_colored_objects| Yes   | Yes               | Yes              |
-| ruin_names         | Yes               | Yes               | Yes              |
-| salient_translation_error_detection| Yes| Yes              | Yes              |
-| snarks             | Yes               | Yes               | Yes              |
-| sports_understanding| Yes              | Yes               | No               |
-| temporal_sequences | Yes               | Yes               | Yes              |
-| tracking_shuffled_objects_five_objects| Yes| Yes           | Yes              |
-| tracking_shuffled_objects_seven_objects| Yes| Yes          | Yes              |
-| tracking_shuffled_objects_three_objects| Yes| Yes          | Yes              |
-| web_of_lies        | Yes               | Yes               | No               |
-| word_sorting       | Yes               | No                | No               |
-
-
-Notes:
- `web_of_lies` already starts with `Question: `
- Tasks with options are `Options: (A) ...` (multiple choice) or `Options: - ...` (binary choice)
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml
-group: bbh_alt_pv_zeroshot
-task:
-  - bbh_alt_pv_01_zeroshot
-  - bbh_alt_pv_02_zeroshot
-  - bbh_alt_pv_03_zeroshot
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml
-group: bbh_alt_pv_01_zeroshot
-dataset_path: lukaemon/bbh
-output_type: multiple_choice
-test_split: test
-doc_to_text: !function ../../styles.styles_01
-doc_to_target: !function ../../styles.doc_to_target
-doc_to_choice: !function ../../styles.doc_to_choice
-num_fewshot: 0
-metric_list:
-  - metric: acc
-  - metric: acc_norm
-  - metric: brier_score
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml
-"dataset_name": "boolean_expressions"
-"description": "Evaluate the result of a random Boolean expression.\n\n"
-"include": "_zeroshot_template_yaml"
-"task": "bbh_alt_pv_01_zeroshot_boolean_expressions"
-"doc_to_target": target
-"doc_to_choice": ["True", "False"]
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml
-"dataset_name": "causal_judgement"
-"description": "Answer questions about causal attribution.\n\n"
-"include": "_zeroshot_template_yaml"
-"task": "bbh_alt_pv_01_zeroshot_causal_judgement"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml
-"dataset_name": "date_understanding"
-"description": "Infer the date from context.\n\n"
-"include": "_zeroshot_template_yaml"
-"task": "bbh_alt_pv_01_zeroshot_date_understanding"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml
-"dataset_name": "disambiguation_qa"
-"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
-"include": "_zeroshot_template_yaml"
-"task": "bbh_alt_pv_01_zeroshot_disambiguation_qa"