添加Megatron项目

5add46aa · hepj · deb8370c · 5add46aa · 5add46aa · 5add46aa
Commit 5add46aa authored Jan 09, 2025 by hepj
20 changed files
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_fewshot/word_sorting.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_fewshot/word_sorting.yaml
+"dataset_name": "word_sorting"
+"description": "Sort a list of words.\n\n"
+"doc_to_text": "Q: Sort the following words alphabetically: List: oven costume counterpart\nA: Let's think step by step.\nThe first letter: \"oven\": \"o\" (15). \"costume\": \"c\" (3). \"counterpart\": \"c\" (3). We now have: (3) [\"costume\" ? \"counterpart\"] < (15) \"oven\". Now let's sort this subpart [\"costume\" ? \"counterpart\"] by looking at their second letters.\nThe second letter: \"costume\": \"o\" (15). \"counterpart\": \"o\" (15). We now have: (15) [\"costume\" ? \"counterpart\"]. Now let's sort this subpart [\"costume\" ? \"counterpart\"] by looking at their third letters.\nThe third letter: \"costume\": \"s\" (19). \"counterpart\": \"u\" (21). We now have: (19) \"costume\" < (21) \"counterpart\". Hence, we have [\"costume\" < \"counterpart\"] < \"oven\". So the answer is costume counterpart oven.\n\nQ: Sort the following words alphabetically: List: hypochlorite ponderosa phone credulity\nA: Let's think step by step.\nThe first letter: \"hypochlorite\": \"h\" (8). \"ponderosa\": \"p\" (16). \"phone\": \"p\" (16). \"credulity\": \"c\" (3). We now have: (3) \"credulity\" < (8) \"hypochlorite\" < (16) [\"ponderosa\" ? \"phone\"]. Now let's sort this subpart [\"ponderosa\" ? \"phone\"] by looking at their second letters.\nThe second letter: \"ponderosa\": \"o\" (15). \"phone\": \"h\" (8). We now have: (8) \"phone\" < (15) \"ponderosa\". Hence, we have \"credulity\" < \"hypochlorite\" < [\"phone\" <\"ponderosa\"]. So the answer is credulity hypochlorite phone ponderosa.\n\nQ: Sort the following words alphabetically: List: newt arson parthia seismography mugho aspect census\nA: Let's think step by step.\nThe first letter: \"newt\": \"n\" (14). \"arson\": \"a\" (1). \"parthia\": \"p\" (16). \"seismography\": \"s\" (19). \"mugho\": \"m\" (13). \"aspect\": \"a\" (1). \"census\": \"c\" (3). We now have: (1) [\"arson\" ? \"aspect\"] < (3) \"census\" < (13) \"mugho\" < (14) \"newt\" < (16) \"parthia\" < (19) \"seismography\". Now let's sort this subpart [\"arson\" ? \"aspect\"] by looking at their second letters.\nThe second letter: \"arson\": \"r\" (18). \"aspect\": \"s\" (19). We now have: (18) \"arson\" < (19) \"aspect\". Hence, we have [\"arson\" < \"aspect\"] < \"census\" < \"mugho\" < \"newt\" < \"parthia\" < \"seismography\". So the answer is arson aspect census mugho newt parthia seismography.\n\nQ: {{input}}\nA: Let's think step by step.\n"
+"include": "_cot_fewshot_template_yaml"
+"task": "bbh_cot_fewshot_word_sorting"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/_cot_zeroshot_template_yaml
+group: bbh_cot_zeroshot
+dataset_path: lukaemon/bbh
+output_type: generate_until
+test_split: test
+doc_to_target: "{{target}}"
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true
+    ignore_case: true
+    # ignore_punctuation: true
+    regexes_to_ignore:
+      - "\\.$"
+      - ","
+      - "\\\\"
+      - "\n"
+      - '"'
+generation_kwargs:
+  until:
+    - "</s>"
+    - "Q:"
+    - "<|im_end|>"
+  do_sample: false
+  temperature: 0.0
+num_fewshot: 0
+metadata:
+  version: 2.0
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/boolean_expressions.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/boolean_expressions.yaml
+"dataset_name": "boolean_expressions"
+"description": "Evaluate the result of a random Boolean expression.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_boolean_expressions"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: "regex"
+        group_select: -1
+        regex_pattern: "\\b(True|False)\\b"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/causal_judgement.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/causal_judgement.yaml
+"dataset_name": "causal_judgement"
+"description": "Answer questions about causal attribution.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_causal_judgement"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: "regex"
+        group_select: -1
+        regex_pattern: "\\b(Yes|No|yes|no)\\b"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/date_understanding.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/date_understanding.yaml
+"dataset_name": "date_understanding"
+"description": "Infer the date from context.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_date_understanding"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/disambiguation_qa.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/disambiguation_qa.yaml
+"dataset_name": "disambiguation_qa"
+"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_disambiguation_qa"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/dyck_languages.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/dyck_languages.yaml
+"dataset_name": "dyck_languages"
+"description": "Correctly close a Dyck-n word.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_dyck_languages"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: "regex"
+        group_select: -1
+        regex_pattern: "(?<= )([\" \\[\\(<{}>\\)\\]]+)|([\" \\[\\(<{}>\\)\\]]+)"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/formal_fallacies.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/formal_fallacies.yaml
+"dataset_name": "formal_fallacies"
+"description": "Distinguish deductively valid arguments from formal fallacies.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_formal_fallacies"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: "regex"
+        group_select: -1
+        regex_pattern: "\\b(valid|invalid)\\b"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/geometric_shapes.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/geometric_shapes.yaml
+"dataset_name": "geometric_shapes"
+"description": "Name geometric shapes from their SVG paths.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_geometric_shapes"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/hyperbaton.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/hyperbaton.yaml
+"dataset_name": "hyperbaton"
+"description": "Order adjectives correctly in English sentences.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_hyperbaton"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_five_objects.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_five_objects.yaml
+"dataset_name": "logical_deduction_five_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_logical_deduction_five_objects"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_seven_objects.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_seven_objects.yaml
+"dataset_name": "logical_deduction_seven_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_logical_deduction_seven_objects"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_three_objects.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/logical_deduction_three_objects.yaml
+"dataset_name": "logical_deduction_three_objects"
+"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_logical_deduction_three_objects"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/movie_recommendation.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/movie_recommendation.yaml
+"dataset_name": "movie_recommendation"
+"description": "Recommend movies similar to the given list of movies.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_movie_recommendation"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/multistep_arithmetic_two.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/multistep_arithmetic_two.yaml
+"dataset_name": "multistep_arithmetic_two"
+"description": "Solve multi-step arithmetic problems.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_multistep_arithmetic_two"
+
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.NumberParseRegexFilter
+        group_select: -1
+        regex_pattern: "([-0-9]+)"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/navigate.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/navigate.yaml
+"dataset_name": "navigate"
+"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_navigate"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: "regex"
+        group_select: -1
+        regex_pattern: "\\b(Yes|No|yes|no)\\b"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/object_counting.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/object_counting.yaml
+"dataset_name": "object_counting"
+"description": "Questions that involve enumerating objects and asking the model to count them.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_object_counting"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.NumberParseRegexFilter
+        group_select: -1
+        regex_pattern: "([-0-9]+)"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/penguins_in_a_table.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/penguins_in_a_table.yaml
+"dataset_name": "penguins_in_a_table"
+"description": "Answer questions about a table of penguins and their attributes.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_penguins_in_a_table"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/reasoning_about_colored_objects.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/reasoning_about_colored_objects.yaml
+"dataset_name": "reasoning_about_colored_objects"
+"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_reasoning_about_colored_objects"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"
--- a/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/ruin_names.yaml
+++ b/LM-Evaluation-Harness-240310/lm_eval/tasks/bbh/cot_zeroshot/ruin_names.yaml
+"dataset_name": "ruin_names"
+"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n"
+"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
+"include": "_cot_zeroshot_template_yaml"
+"task": "bbh_cot_zeroshot_ruin_names"
+filter_list:
+  - name: "flexible-extract"
+    filter:
+      - function: !function utils.MultiChoiceRegexFilter
+        group_select: -1
+        ignore_case: true
+        ignore_punctuation: true
+        regex_pattern: "(\\([A-Z]\\))"
+      - function: "take_first"
+  - name: "strict-match"
+    filter:
+      - function: "regex"
+        regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
+      - function: "take_first"