Unverified Commit 12b6eeb5 authored by Ramiro R. C.'s avatar Ramiro R. C. Committed by GitHub
Browse files

fixed mmlu generative response extraction (#2503)



* fixed mmlu generative response extraction

* updated file version | added args to exact_match

* fix

* fix

* pre-commit

* fix groups

---------
Co-authored-by: default avatarBaber <baber@hey.com>
parent 88144079
...@@ -13,46 +13,48 @@ from tqdm import tqdm ...@@ -13,46 +13,48 @@ from tqdm import tqdm
eval_logger = logging.getLogger("lm-eval") eval_logger = logging.getLogger("lm-eval")
SUBJECTS = {'Islamic Studies': 'humanities', SUBJECTS = {
'Driving Test': 'other', "Islamic Studies": "humanities",
'Natural Science (Middle School)': 'stem', "Driving Test": "other",
'Natural Science (Primary School)': 'stem', "Natural Science (Middle School)": "stem",
'History (Primary School)': 'humanities', "Natural Science (Primary School)": "stem",
'History (Middle School)': 'humanities', "History (Primary School)": "humanities",
'History (High School)': 'humanities', "History (Middle School)": "humanities",
'General Knowledge': 'other', "History (High School)": "humanities",
'General Knowledge (Primary School)': 'other', "General Knowledge": "other",
'General Knowledge (Middle School)': 'other', "General Knowledge (Primary School)": "other",
'Law (Professional)': 'humanities', "General Knowledge (Middle School)": "other",
'Physics (High School)': 'stem', "Law (Professional)": "humanities",
'Social Science (Middle School)': 'social_science', "Physics (High School)": "stem",
'Social Science (Primary School)': 'social_science', "Social Science (Middle School)": "social_science",
'Management (University)': 'other', "Social Science (Primary School)": "social_science",
'Arabic Language (Primary School)': 'language', "Management (University)": "other",
'Arabic Language (Middle School)': 'language', "Arabic Language (Primary School)": "language",
'Arabic Language (High School)': 'language', "Arabic Language (Middle School)": "language",
'Political Science (University)': 'social_science', "Arabic Language (High School)": "language",
'Philosophy (High School)': 'humanities', "Political Science (University)": "social_science",
'Accounting (University)': 'social_science', "Philosophy (High School)": "humanities",
'Computer Science (University)': 'stem', "Accounting (University)": "social_science",
'Computer Science (Middle School)': 'stem', "Computer Science (University)": "stem",
'Computer Science (Primary School)': 'stem', "Computer Science (Middle School)": "stem",
'Computer Science (High School)': 'stem', "Computer Science (Primary School)": "stem",
'Geography (Primary School)': 'social_science', "Computer Science (High School)": "stem",
'Geography (Middle School)': 'social_science', "Geography (Primary School)": "social_science",
'Geography (High School)': 'social_science', "Geography (Middle School)": "social_science",
'Math (Primary School)': 'stem', "Geography (High School)": "social_science",
'Biology (High School)': 'stem', "Math (Primary School)": "stem",
'Economics (University)': 'social_science', "Biology (High School)": "stem",
'Economics (Middle School)': 'social_science', "Economics (University)": "social_science",
'Economics (High School)': 'social_science', "Economics (Middle School)": "social_science",
'Arabic Language (General)': 'language', "Economics (High School)": "social_science",
'Arabic Language (Grammar)': 'language', "Arabic Language (General)": "language",
'Islamic Studies (High School)': 'humanities', "Arabic Language (Grammar)": "language",
'Islamic Studies (Middle School)': 'humanities', "Islamic Studies (High School)": "humanities",
'Islamic Studies (Primary School)': 'humanities', "Islamic Studies (Middle School)": "humanities",
'Civics (Middle School)': 'social_science', "Islamic Studies (Primary School)": "humanities",
'Civics (High School)': 'social_science'} "Civics (Middle School)": "social_science",
"Civics (High School)": "social_science",
}
def parse_args(): def parse_args():
......
# noqa
""" """
Take in a YAML, and output all "other" splits with this YAML Take in a YAML, and output all "other" splits with this YAML
""" """
......
...@@ -14,7 +14,21 @@ metric_list: ...@@ -14,7 +14,21 @@ metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
ignore_punctuation: true
ignore_case: true
filter_list:
- name: get_response
filter:
# Filter everything after the first break line
- function: "regex"
regex_pattern: "^(.*?)(?=\\n|$)"
# Remove leading white spaces
- function: remove_whitespace
# function to ignore right white spaces or line breaks
- function: "regex"
regex_pattern: "^(.*?)\\s*$"
- function: take_first
metadata: metadata:
version: 2.0 version: 3.0
dataset_kwargs: dataset_kwargs:
trust_remote_code: true trust_remote_code: true
...@@ -5,29 +5,29 @@ task: ...@@ -5,29 +5,29 @@ task:
task: task:
- mmlu_stem_generative - mmlu_stem_generative
aggregate_metric_list: aggregate_metric_list:
- metric: acc - metric: exact_match
weight_by_size: True weight_by_size: true
- group: other - group: other
task: task:
- mmlu_other_generative - mmlu_other_generative
aggregate_metric_list: aggregate_metric_list:
- metric: acc - metric: exact_match
weight_by_size: True weight_by_size: true
- group: social sciences - group: social sciences
task: task:
- mmlu_social_sciences_generative - mmlu_social_sciences_generative
aggregate_metric_list: aggregate_metric_list:
- metric: acc - metric: exact_match
weight_by_size: True weight_by_size: true
- group: humanities - group: humanities
task: task:
- mmlu_humanities_generative - mmlu_humanities_generative
aggregate_metric_list: aggregate_metric_list:
- metric: acc - metric: exact_match
weight_by_size: True weight_by_size: true
aggregate_metric_list: aggregate_metric_list:
- aggregation: mean - aggregation: mean
metric: exact_match metric: exact_match
weight_by_size: True weight_by_size: true
metadata: metadata:
version: 2 version: 3
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment