resolved conflict

c0d5a660 · lintangsutawika · f7b81bd4 · 0d1ef037 · c0d5a660 · c0d5a660
Commit c0d5a660 authored Jan 17, 2024 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml
+"dataset_name": "penguins_in_a_table"
+"description": "Answer questions about a table of penguins and their attributes.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_penguins_in_a_table"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml
+"dataset_name": "reasoning_about_colored_objects"
+"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_reasoning_about_colored_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml
+"dataset_name": "ruin_names"
+"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_ruin_names"
+"process_docs": !function ../../utils.fix_ruin_names
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml
+"dataset_name": "salient_translation_error_detection"
+"description": "Detect the type of error in an English translation of a German source sentence.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_salient_translation_error_detection"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml
+"dataset_name": "snarks"
+"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_snarks"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml
+"dataset_name": "sports_understanding"
+"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_sports_understanding"
+"doc_to_target": target
+"doc_to_choice": ["yes", "no"]
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml
+"dataset_name": "temporal_sequences"
+"description": "Task description: Answer questions about which times certain events could have occurred.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_temporal_sequences"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml
+"dataset_name": "tracking_shuffled_objects_five_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_five_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml
+"dataset_name": "tracking_shuffled_objects_seven_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_seven_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml
+"dataset_name": "tracking_shuffled_objects_three_objects"
+"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_three_objects"
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml
+"dataset_name": "web_of_lies"
+"description": "Evaluate a random boolean function expressed as a word problem.\n\n"
+"include": "_zeroshot_template_yaml"
+"task": "bbh_alt_pv_03_zeroshot_web_of_lies"
+"doc_to_target": target
+"doc_to_choice": ["Yes", "No"]
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py
+import re
+import string
+
+yes_no = ["Yes", "No"]
+
+
+def parse_choices(doc):
+
+    input_text = doc["input"]
+    choice_string = input_text.split("Options:")
+    if len(choice_string) == 2:
+        choice_string = choice_string[-1]
+        if ("- Yes" in choice_string) and ("- No" in choice_string):
+            choices = yes_no
+        else:
+            choices = [
+                c[4:].rstrip("\n")
+                for c in re.findall(r"\([A-Z]\) .*?\n|\([A-Z]\) .*?$", choice_string)
+            ]
+        return choices
+    else:
+        return []
+
+
+def styles_01(doc):
+    # Check for choices and remove them
+    choices = parse_choices(doc)
+    if choices != []:
+        doc_to_text = doc["input"].split("Options:")[0]
+        if doc_to_text[-1] in ["\n", " "]:
+            doc_to_text = doc_to_text[:-1]
+    else:
+        doc_to_text = doc["input"]
+    return doc_to_text
+
+
+def styles_02(doc):
+    # Check for choices and remove them
+    doc_to_text = styles_01(doc)
+    return "Q: " + doc_to_text + "\nA:"
+
+
+def styles_03(doc):
+    # Check for choices and remove them
+    doc_to_text = styles_01(doc)
+    return "Question: " + doc_to_text + "\nAnswer:"
+
+
+def doc_to_choice(doc):
+    return parse_choices(doc)
+
+
+def doc_to_target(doc):
+    target = doc["target"]
+    try:
+        if target in ["Yes", "No"]:
+            return yes_no.index(target)
+        else:
+            return string.ascii_uppercase.index(target[1:-1])
+        # else:
+        #     return parse_choices(doc).index(target)
+
+    except Exception as err:
+        print("Full Doc")
+        print(doc)
+        print("Choices")
+        print(parse_choices(doc))
+        print("Error")
+        print(err)
+        import sys
+
+        sys.exit()
--- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py
+++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py
+# For fixing line 163 in `movie_recommendation`
+
+
+def fix_movie_recommendation(data):
+    def _fix(doc):
+        if doc["target"] == "Monsters, Inc":
+            doc[
+                "input"
+            ] = "Find a movie similar to Minority Report, Shrek, Catch Me If You Can, Aladdin:\nOptions:\n(A) Monsters, Inc\n(B) Children of the Night\n(C) The Incredible Shrinking Man\n(D) Town & Country"
+            doc["target"] = "(A)"
+        return doc
+
+    return data.map(_fix)
+
+
+def fix_ruin_names(data):
+    def _fix(doc):
+        if doc["target"] == "dearth, wind, & fire":
+            doc[
+                "input"
+            ] = "Which of the following is a humorous edit of this artist or movie name: 'earth, wind, & fire'?\nOptions:\n(A) eareth, wind, & fire\n(B) earth, bind, & fire\n(C) earthm wind, & fire\n(D) dearth, wind, & fire"
+            doc["target"] = "(D)"
+
+        elif doc["target"] == "rita, sue and bob poo":
+            doc[
+                "input"
+            ] = "Which of the following is a humorous edit of this artist or movie name: 'rita, sue and bob too'?\nOptions:\n(A) rita, sue and bob too\n(B) rita, sue and bob poo\n(C) rita, sue and box too\n(D) rita,y sue and bob too"
+            doc["target"] = "(B)"
+        return doc
+
+    return data.map(_fix)
--- a/lm_eval/tasks/bigbench/alternative_worlds/aux_metric.py
+++ b/lm_eval/tasks/bigbench/alternative_worlds/aux_metric.py
+from textdistance import levenshtein
+from transformers import AutoTokenizer
+
+# Change this tokenizer to fit with the model you are using.
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b", max_new_tokens=128)
+
+
+def token_edit_distance(references, predictions, **kwargs):
+    print(references)
+    print(predictions)
+    print("###")
+    ref_tokens = tokenizer.encode(references[0])
+    pred_tokens = tokenizer.encode(predictions[0])
+    return levenshtein.distance(ref_tokens, pred_tokens)
--- a/lm_eval/tasks/hellaswag/alternative_worlds/README.md
+++ b/lm_eval/tasks/hellaswag/alternative_worlds/README.md
+
+
+Investigate affect of letter options
+- (A)
+- A)
+- A.
+- A\t
+- (a)
+- a)
+- a.
+- a\t
+
+Answer types:
+- letters only
+    - original option
+    - just letter
+- letters + continuation
+    - original option
+    - just letter
+- continuation
--- a/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml
+++ b/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml
+dataset_path: Rowan/hellaswag
+dataset_name: null
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+test_split: null
+process_docs: !function ../utils.process_docs
+doc_to_text: "{{query}}"
+doc_to_target: "{{label}}"
+doc_to_choice: "{{choices}}"
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_norm
+    aggregation: mean
+    higher_is_better: true
+  - metric: brier_score
+    higher_is_better: false
--- a/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml
+++ b/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml
+group: hellaswag_alt
+task:
+  - hellaswag_01
+  - hellaswag_02
+  - hellaswag_03
+  - hellaswag_04
+  - hellaswag_05
+  - hellaswag_06
+  - hellaswag_07
+  - hellaswag_08
--- a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml
+++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml
+include: ../_hellaswag_alt_yaml
+group: hellaswag_01
+group_alias: style_01
+task: hellaswag_01a
+task_alias: a
+doc_to_text: !function ../styles.template_01
+doc_to_choice: !function ../styles.choice_01a
--- a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml
+++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml
+include: ../_hellaswag_alt_yaml
+group: hellaswag_01
+group_alias: style_01
+task: hellaswag_01b
+task_alias: b
+doc_to_text: !function ../styles.template_01
+doc_to_choice: !function ../styles.choice_01b
--- a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml
+++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml
+include: ../_hellaswag_alt_yaml
+group: hellaswag_01
+group_alias: style_01
+task: hellaswag_01c
+task_alias: c
+doc_to_text: !function ../styles.template_01
+doc_to_choice: !function ../styles.choice_01c