Commit 5a4fc8fb authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into port_master_squadv2
parents 51753750 e35b0667
...@@ -559,8 +559,6 @@ def evaluate( ...@@ -559,8 +559,6 @@ def evaluate(
task_hierarchy, task_order, versions, task_group_alias task_hierarchy, task_order, versions, task_group_alias
) )
_results_agg = collections.defaultdict(dict)
_versions = collections.defaultdict(dict)
for task in results_agg: for task in results_agg:
task_results = results_agg[task] task_results = results_agg[task]
...@@ -574,15 +572,10 @@ def evaluate( ...@@ -574,15 +572,10 @@ def evaluate(
if task in task_group_alias: if task in task_group_alias:
task_alias = task_group_alias[task] task_alias = task_group_alias[task]
_results_agg[tab_string + task_alias] = task_results results_agg[task]["alias"] = tab_string + task_alias
_versions[tab_string + task_alias] = versions[task]
else: else:
_results_agg[tab_string + task] = task_results results_agg[task]["alias"] = tab_string + task
_versions[tab_string + task] = versions[task]
results_agg = _results_agg
versions = _versions
_groups_agg = collections.defaultdict(dict)
for group in groups_agg: for group in groups_agg:
group_results = groups_agg[group] group_results = groups_agg[group]
...@@ -596,10 +589,9 @@ def evaluate( ...@@ -596,10 +589,9 @@ def evaluate(
if group in task_group_alias: if group in task_group_alias:
group_alias = task_group_alias[group] group_alias = task_group_alias[group]
_groups_agg[tab_string + group_alias] = group_results groups_agg[group]["alias"] = tab_string + group_alias
else: else:
_groups_agg[tab_string + group] = group_results groups_agg[group]["alias"] = tab_string + group
groups_agg = _groups_agg
results_dict = { results_dict = {
"results": dict(results_agg.items()), "results": dict(results_agg.items()),
......
group: mmlu
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
......
group: mmlu_flan_cot_fewshot
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation validation_split: validation
fewshot_split: dev fewshot_split: dev
......
group: mmlu_flan_cot_zeroshot
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation validation_split: validation
fewshot_split: dev fewshot_split: dev
......
group: mmlu_flan_n_shot_generative group: mmlu_flan_n_shot_loglikelihood
task: task:
- mmlu_flan_n_shot_generative_stem - mmlu_flan_n_shot_loglikelihood_stem
- mmlu_flan_n_shot_generative_other - mmlu_flan_n_shot_loglikelihood_other
- mmlu_flan_n_shot_generative_social_sciences - mmlu_flan_n_shot_loglikelihood_social_sciences
- mmlu_flan_n_shot_generative_humanities - mmlu_flan_n_shot_loglikelihood_humanities
group: mmlu_flan_n_shot_loglikelihood
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test test_split: test
fewshot_split: dev fewshot_split: dev
......
"dataset_name": "abstract_algebra" "dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\ "description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n" \ algebra.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_abstract_algebra" "task": "mmlu_flan_n_shot_loglikelihood_abstract_algebra"
"dataset_name": "anatomy" "dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\ "description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n" \n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_anatomy" "task": "mmlu_flan_n_shot_loglikelihood_anatomy"
"dataset_name": "astronomy" "dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\ "description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n" \n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_astronomy" "task": "mmlu_flan_n_shot_loglikelihood_astronomy"
"dataset_name": "business_ethics" "dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\ "description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n" \ ethics.\n\n"
"group": "mmlu_flan_n_shot_generative_other" "group": "mmlu_flan_n_shot_loglikelihood_other"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_business_ethics" "task": "mmlu_flan_n_shot_loglikelihood_business_ethics"
"dataset_name": "clinical_knowledge" "dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\ "description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n" \ knowledge.\n\n"
"group": "mmlu_flan_n_shot_generative_other" "group": "mmlu_flan_n_shot_loglikelihood_other"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_clinical_knowledge" "task": "mmlu_flan_n_shot_loglikelihood_clinical_knowledge"
"dataset_name": "college_biology" "dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ biology.\n\n" \ biology.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_biology" "task": "mmlu_flan_n_shot_loglikelihood_college_biology"
"dataset_name": "college_chemistry" "dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ chemistry.\n\n" \ chemistry.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_chemistry" "task": "mmlu_flan_n_shot_loglikelihood_college_chemistry"
"dataset_name": "college_computer_science" "dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ computer science.\n\n" \ computer science.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_computer_science" "task": "mmlu_flan_n_shot_loglikelihood_college_computer_science"
"dataset_name": "college_mathematics" "dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ mathematics.\n\n" \ mathematics.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_mathematics" "task": "mmlu_flan_n_shot_loglikelihood_college_mathematics"
"dataset_name": "college_medicine" "dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ medicine.\n\n" \ medicine.\n\n"
"group": "mmlu_flan_n_shot_generative_other" "group": "mmlu_flan_n_shot_loglikelihood_other"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_medicine" "task": "mmlu_flan_n_shot_loglikelihood_college_medicine"
"dataset_name": "college_physics" "dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college\ "description": "The following are multiple choice questions (with answers) about college\
\ physics.\n\n" \ physics.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_physics" "task": "mmlu_flan_n_shot_loglikelihood_college_physics"
"dataset_name": "computer_security" "dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer\ "description": "The following are multiple choice questions (with answers) about computer\
\ security.\n\n" \ security.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_computer_security" "task": "mmlu_flan_n_shot_loglikelihood_computer_security"
"dataset_name": "conceptual_physics" "dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\ "description": "The following are multiple choice questions (with answers) about conceptual\
\ physics.\n\n" \ physics.\n\n"
"group": "mmlu_flan_n_shot_generative_stem" "group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_conceptual_physics" "task": "mmlu_flan_n_shot_loglikelihood_conceptual_physics"
"dataset_name": "econometrics" "dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\ "description": "The following are multiple choice questions (with answers) about econometrics.\n\
\n" \n"
"group": "mmlu_flan_n_shot_generative_social_sciences" "group": "mmlu_flan_n_shot_loglikelihood_social_sciences"
"include": "_mmlu_flan_loglikelihood_template_yaml" "include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_econometrics" "task": "mmlu_flan_n_shot_loglikelihood_econometrics"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment