Unverified Commit 9822b06e authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge branch 'main' into weight_by_size

parents 51f27158 b177c82c
"dataset_name": "web_of_lies"
"description": "Evaluate a random boolean function expressed as a word problem.\n\n"
"doc_to_text": "Q: {{input}}\nA: Let's think step by step.\n"
"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
"include": "_cot_zeroshot_template_yaml"
"task": "bbh_cot_zeroshot_web_of_lies"
filter_list:
- name: "flexible-extract"
filter:
- function: !function utils.MapRegexFilter
group_select: -1
ignore_case: true
regex_pattern_to_value:
\b(no|does not tell the truth|is not telling the truth)\b: "no"
\b(yes|tells the truth|is telling the truth)\b: "yes"
- function: "take_first"
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first"
"dataset_name": "word_sorting"
"description": "Sort a list of words.\n\n"
"doc_to_text": "Q: {{input}}\nA: Let's think step by step.\n"
"doc_to_text": "Q: {{input}}\nA: Let's think step by step."
"include": "_cot_zeroshot_template_yaml"
"task": "bbh_cot_zeroshot_word_sorting"
filter_list:
- name: "flexible-extract"
filter:
- function: !function utils.WordSortFilter
- function: "take_first"
- name: "strict-match"
filter:
- function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first"
......@@ -19,3 +19,4 @@ generation_kwargs:
num_fewshot: 0
metadata:
version: 1.0
num_fewshot: 3 # will be printed in results table
......@@ -7,15 +7,21 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
# ignore_case: true
ignore_case: true
# ignore_punctuation: true
regexes_to_ignore:
- "\\.$"
- ","
- "\n"
- "\\\\"
- '"'
generation_kwargs:
until:
- "</s>"
- "Q:"
- "\n\n"
- "<|im_end|>"
do_sample: false
temperature: 0.0
num_fewshot: 0
metadata:
version: 1.0
version: 2.0
......@@ -3,3 +3,14 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_boolean_expressions"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: 0
regex_pattern: "\\b(True|False)\\b"
- function: "take_first"
......@@ -3,3 +3,14 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_causal_judgement"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: 0
regex_pattern: "\\b(Yes|No|yes|no)\\b"
- function: "take_first"
......@@ -3,3 +3,16 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_date_understanding"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,16 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_disambiguation_qa"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,13 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_dyck_languages"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: 0
regex_pattern: "(?<= )([\" \\[\\(<{}>\\)\\]]+)|([\" \\[\\(<{}>\\)\\]]+)"
- function: "take_first"
......@@ -3,3 +3,14 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_formal_fallacies"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: 0
regex_pattern: "\\b(valid|invalid)\\b"
- function: "take_first"
......@@ -3,3 +3,16 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_geometric_shapes"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,16 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_hyperbaton"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,15 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_logical_deduction_five_objects"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,15 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_logical_deduction_seven_objects"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,15 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_logical_deduction_three_objects"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,15 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_movie_recommendation"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
......@@ -3,3 +3,14 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_multistep_arithmetic_two"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.NumberParseRegexFilter
group_select: 0
regex_pattern: "([-0-9]+)"
- function: "take_first"
......@@ -3,3 +3,13 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_navigate"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: "regex"
group_select: 0
regex_pattern: "\\b(Yes|No|yes|no)\\b"
- function: "take_first"
......@@ -3,3 +3,13 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_object_counting"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.NumberParseRegexFilter
group_select: 0
regex_pattern: "([-0-9]+)"
- function: "take_first"
......@@ -3,3 +3,15 @@
"doc_to_text": "Q: {{input}}\nA:"
"include": "_zeroshot_template_yaml"
"task": "bbh_zeroshot_penguins_in_a_table"
filter_list:
- name: "strict-match"
filter:
- function: "take_first"
- name: "flexible-extract"
filter:
- function: !function utils.MultiChoiceRegexFilter
group_select: 0
ignore_case: true
ignore_punctuation: true
regex_pattern: "(\\([A-Z]\\))"
- function: "take_first"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment