Commit 5a4fc8fb authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of...

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into port_master_squadv2
parents 51753750 e35b0667
......@@ -559,8 +559,6 @@ def evaluate(
task_hierarchy, task_order, versions, task_group_alias
)
_results_agg = collections.defaultdict(dict)
_versions = collections.defaultdict(dict)
for task in results_agg:
task_results = results_agg[task]
......@@ -574,15 +572,10 @@ def evaluate(
if task in task_group_alias:
task_alias = task_group_alias[task]
_results_agg[tab_string + task_alias] = task_results
_versions[tab_string + task_alias] = versions[task]
results_agg[task]["alias"] = tab_string + task_alias
else:
_results_agg[tab_string + task] = task_results
_versions[tab_string + task] = versions[task]
results_agg = _results_agg
versions = _versions
results_agg[task]["alias"] = tab_string + task
_groups_agg = collections.defaultdict(dict)
for group in groups_agg:
group_results = groups_agg[group]
......@@ -596,10 +589,9 @@ def evaluate(
if group in task_group_alias:
group_alias = task_group_alias[group]
_groups_agg[tab_string + group_alias] = group_results
groups_agg[group]["alias"] = tab_string + group_alias
else:
_groups_agg[tab_string + group] = group_results
groups_agg = _groups_agg
groups_agg[group]["alias"] = tab_string + group
results_dict = {
"results": dict(results_agg.items()),
......
group: mmlu
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
......
group: mmlu_flan_cot_fewshot
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation
fewshot_split: dev
......
group: mmlu_flan_cot_zeroshot
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
validation_split: validation
fewshot_split: dev
......
group: mmlu_flan_n_shot_generative
group: mmlu_flan_n_shot_loglikelihood
task:
- mmlu_flan_n_shot_generative_stem
- mmlu_flan_n_shot_generative_other
- mmlu_flan_n_shot_generative_social_sciences
- mmlu_flan_n_shot_generative_humanities
- mmlu_flan_n_shot_loglikelihood_stem
- mmlu_flan_n_shot_loglikelihood_other
- mmlu_flan_n_shot_loglikelihood_social_sciences
- mmlu_flan_n_shot_loglikelihood_humanities
group: mmlu_flan_n_shot_loglikelihood
dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
......
"dataset_name": "abstract_algebra"
"description": "The following are multiple choice questions (with answers) about abstract\
\ algebra.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_abstract_algebra"
"task": "mmlu_flan_n_shot_loglikelihood_abstract_algebra"
"dataset_name": "anatomy"
"description": "The following are multiple choice questions (with answers) about anatomy.\n\
\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_anatomy"
"task": "mmlu_flan_n_shot_loglikelihood_anatomy"
"dataset_name": "astronomy"
"description": "The following are multiple choice questions (with answers) about astronomy.\n\
\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_astronomy"
"task": "mmlu_flan_n_shot_loglikelihood_astronomy"
"dataset_name": "business_ethics"
"description": "The following are multiple choice questions (with answers) about business\
\ ethics.\n\n"
"group": "mmlu_flan_n_shot_generative_other"
"group": "mmlu_flan_n_shot_loglikelihood_other"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_business_ethics"
"task": "mmlu_flan_n_shot_loglikelihood_business_ethics"
"dataset_name": "clinical_knowledge"
"description": "The following are multiple choice questions (with answers) about clinical\
\ knowledge.\n\n"
"group": "mmlu_flan_n_shot_generative_other"
"group": "mmlu_flan_n_shot_loglikelihood_other"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_clinical_knowledge"
"task": "mmlu_flan_n_shot_loglikelihood_clinical_knowledge"
"dataset_name": "college_biology"
"description": "The following are multiple choice questions (with answers) about college\
\ biology.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_biology"
"task": "mmlu_flan_n_shot_loglikelihood_college_biology"
"dataset_name": "college_chemistry"
"description": "The following are multiple choice questions (with answers) about college\
\ chemistry.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_chemistry"
"task": "mmlu_flan_n_shot_loglikelihood_college_chemistry"
"dataset_name": "college_computer_science"
"description": "The following are multiple choice questions (with answers) about college\
\ computer science.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_computer_science"
"task": "mmlu_flan_n_shot_loglikelihood_college_computer_science"
"dataset_name": "college_mathematics"
"description": "The following are multiple choice questions (with answers) about college\
\ mathematics.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_mathematics"
"task": "mmlu_flan_n_shot_loglikelihood_college_mathematics"
"dataset_name": "college_medicine"
"description": "The following are multiple choice questions (with answers) about college\
\ medicine.\n\n"
"group": "mmlu_flan_n_shot_generative_other"
"group": "mmlu_flan_n_shot_loglikelihood_other"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_medicine"
"task": "mmlu_flan_n_shot_loglikelihood_college_medicine"
"dataset_name": "college_physics"
"description": "The following are multiple choice questions (with answers) about college\
\ physics.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_college_physics"
"task": "mmlu_flan_n_shot_loglikelihood_college_physics"
"dataset_name": "computer_security"
"description": "The following are multiple choice questions (with answers) about computer\
\ security.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_computer_security"
"task": "mmlu_flan_n_shot_loglikelihood_computer_security"
"dataset_name": "conceptual_physics"
"description": "The following are multiple choice questions (with answers) about conceptual\
\ physics.\n\n"
"group": "mmlu_flan_n_shot_generative_stem"
"group": "mmlu_flan_n_shot_loglikelihood_stem"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_conceptual_physics"
"task": "mmlu_flan_n_shot_loglikelihood_conceptual_physics"
"dataset_name": "econometrics"
"description": "The following are multiple choice questions (with answers) about econometrics.\n\
\n"
"group": "mmlu_flan_n_shot_generative_social_sciences"
"group": "mmlu_flan_n_shot_loglikelihood_social_sciences"
"include": "_mmlu_flan_loglikelihood_template_yaml"
"task": "mmlu_flan_n_shot_generative_econometrics"
"task": "mmlu_flan_n_shot_loglikelihood_econometrics"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment