From b1ba4e71e82ccafabb67145f7fb2f2797544c44f Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 16 Oct 2023 03:18:48 +0000 Subject: [PATCH 01/50] added alt worlds prompts --- .../full_continuation/style_01/_template_yaml | 15 +++++++++++++++ .../style_01/style_01_abstract_algebra.yaml | 4 ++++ .../style_01/style_01_anatomy.yaml | 4 ++++ .../style_01/style_01_astronomy.yaml | 4 ++++ .../style_01/style_01_business_ethics.yaml | 4 ++++ .../style_01/style_01_clinical_knowledge.yaml | 4 ++++ .../style_01/style_01_college_biology.yaml | 4 ++++ .../style_01/style_01_college_chemistry.yaml | 4 ++++ .../style_01_college_computer_science.yaml | 4 ++++ .../style_01/style_01_college_mathematics.yaml | 4 ++++ .../style_01/style_01_college_medicine.yaml | 4 ++++ .../style_01/style_01_college_physics.yaml | 4 ++++ .../style_01/style_01_computer_security.yaml | 4 ++++ .../style_01/style_01_conceptual_physics.yaml | 4 ++++ .../style_01/style_01_econometrics.yaml | 4 ++++ .../style_01/style_01_electrical_engineering.yaml | 4 ++++ .../style_01/style_01_elementary_mathematics.yaml | 4 ++++ .../style_01/style_01_formal_logic.yaml | 4 ++++ .../style_01/style_01_global_facts.yaml | 4 ++++ .../style_01/style_01_high_school_biology.yaml | 4 ++++ .../style_01/style_01_high_school_chemistry.yaml | 4 ++++ .../style_01_high_school_computer_science.yaml | 4 ++++ .../style_01_high_school_european_history.yaml | 4 ++++ .../style_01/style_01_high_school_geography.yaml | 4 ++++ ...le_01_high_school_government_and_politics.yaml | 4 ++++ .../style_01_high_school_macroeconomics.yaml | 4 ++++ .../style_01_high_school_mathematics.yaml | 4 ++++ .../style_01_high_school_microeconomics.yaml | 4 ++++ .../style_01/style_01_high_school_physics.yaml | 4 ++++ .../style_01/style_01_high_school_psychology.yaml | 4 ++++ .../style_01/style_01_high_school_statistics.yaml | 4 ++++ .../style_01/style_01_high_school_us_history.yaml | 4 ++++ .../style_01_high_school_world_history.yaml | 4 ++++ .../style_01/style_01_human_aging.yaml | 4 ++++ .../style_01/style_01_human_sexuality.yaml | 4 ++++ .../style_01/style_01_international_law.yaml | 4 ++++ .../style_01/style_01_jurisprudence.yaml | 4 ++++ .../style_01/style_01_logical_fallacies.yaml | 4 ++++ .../style_01/style_01_machine_learning.yaml | 4 ++++ .../style_01/style_01_management.yaml | 4 ++++ .../style_01/style_01_marketing.yaml | 4 ++++ .../style_01/style_01_medical_genetics.yaml | 4 ++++ .../style_01/style_01_miscellaneous.yaml | 4 ++++ .../style_01/style_01_moral_disputes.yaml | 4 ++++ .../style_01/style_01_moral_scenarios.yaml | 4 ++++ .../style_01/style_01_nutrition.yaml | 4 ++++ .../style_01/style_01_philosophy.yaml | 4 ++++ .../style_01/style_01_prehistory.yaml | 4 ++++ .../style_01_professional_accounting.yaml | 4 ++++ .../style_01/style_01_professional_law.yaml | 4 ++++ .../style_01/style_01_professional_medicine.yaml | 4 ++++ .../style_01_professional_psychology.yaml | 4 ++++ .../style_01/style_01_public_relations.yaml | 4 ++++ .../style_01/style_01_security_studies.yaml | 4 ++++ .../style_01/style_01_sociology.yaml | 4 ++++ .../style_01/style_01_us_foreign_policy.yaml | 4 ++++ .../style_01/style_01_virology.yaml | 4 ++++ .../style_01/style_01_world_religions.yaml | 4 ++++ .../full_continuation/style_02/_template_yaml | 15 +++++++++++++++ .../style_02/style_02_abstract_algebra.yaml | 4 ++++ .../style_02/style_02_anatomy.yaml | 4 ++++ .../style_02/style_02_astronomy.yaml | 4 ++++ .../style_02/style_02_business_ethics.yaml | 4 ++++ .../style_02/style_02_clinical_knowledge.yaml | 4 ++++ .../style_02/style_02_college_biology.yaml | 4 ++++ .../style_02/style_02_college_chemistry.yaml | 4 ++++ .../style_02_college_computer_science.yaml | 4 ++++ .../style_02/style_02_college_mathematics.yaml | 4 ++++ .../style_02/style_02_college_medicine.yaml | 4 ++++ .../style_02/style_02_college_physics.yaml | 4 ++++ .../style_02/style_02_computer_security.yaml | 4 ++++ .../style_02/style_02_conceptual_physics.yaml | 4 ++++ .../style_02/style_02_econometrics.yaml | 4 ++++ .../style_02/style_02_electrical_engineering.yaml | 4 ++++ .../style_02/style_02_elementary_mathematics.yaml | 4 ++++ .../style_02/style_02_formal_logic.yaml | 4 ++++ .../style_02/style_02_global_facts.yaml | 4 ++++ .../style_02/style_02_high_school_biology.yaml | 4 ++++ .../style_02/style_02_high_school_chemistry.yaml | 4 ++++ .../style_02_high_school_computer_science.yaml | 4 ++++ .../style_02_high_school_european_history.yaml | 4 ++++ .../style_02/style_02_high_school_geography.yaml | 4 ++++ ...le_02_high_school_government_and_politics.yaml | 4 ++++ .../style_02_high_school_macroeconomics.yaml | 4 ++++ .../style_02_high_school_mathematics.yaml | 4 ++++ .../style_02_high_school_microeconomics.yaml | 4 ++++ .../style_02/style_02_high_school_physics.yaml | 4 ++++ .../style_02/style_02_high_school_psychology.yaml | 4 ++++ .../style_02/style_02_high_school_statistics.yaml | 4 ++++ .../style_02/style_02_high_school_us_history.yaml | 4 ++++ .../style_02_high_school_world_history.yaml | 4 ++++ .../style_02/style_02_human_aging.yaml | 4 ++++ .../style_02/style_02_human_sexuality.yaml | 4 ++++ .../style_02/style_02_international_law.yaml | 4 ++++ .../style_02/style_02_jurisprudence.yaml | 4 ++++ .../style_02/style_02_logical_fallacies.yaml | 4 ++++ .../style_02/style_02_machine_learning.yaml | 4 ++++ .../style_02/style_02_management.yaml | 4 ++++ .../style_02/style_02_marketing.yaml | 4 ++++ .../style_02/style_02_medical_genetics.yaml | 4 ++++ .../style_02/style_02_miscellaneous.yaml | 4 ++++ .../style_02/style_02_moral_disputes.yaml | 4 ++++ .../style_02/style_02_moral_scenarios.yaml | 4 ++++ .../style_02/style_02_nutrition.yaml | 4 ++++ .../style_02/style_02_philosophy.yaml | 4 ++++ .../style_02/style_02_prehistory.yaml | 4 ++++ .../style_02_professional_accounting.yaml | 4 ++++ .../style_02/style_02_professional_law.yaml | 4 ++++ .../style_02/style_02_professional_medicine.yaml | 4 ++++ .../style_02_professional_psychology.yaml | 4 ++++ .../style_02/style_02_public_relations.yaml | 4 ++++ .../style_02/style_02_security_studies.yaml | 4 ++++ .../style_02/style_02_sociology.yaml | 4 ++++ .../style_02/style_02_us_foreign_policy.yaml | 4 ++++ .../style_02/style_02_virology.yaml | 4 ++++ .../style_02/style_02_world_religions.yaml | 4 ++++ .../full_continuation/style_03/_template_yaml | 15 +++++++++++++++ .../style_03/style_03_abstract_algebra.yaml | 4 ++++ .../style_03/style_03_anatomy.yaml | 4 ++++ .../style_03/style_03_astronomy.yaml | 4 ++++ .../style_03/style_03_business_ethics.yaml | 4 ++++ .../style_03/style_03_clinical_knowledge.yaml | 4 ++++ .../style_03/style_03_college_biology.yaml | 4 ++++ .../style_03/style_03_college_chemistry.yaml | 4 ++++ .../style_03_college_computer_science.yaml | 4 ++++ .../style_03/style_03_college_mathematics.yaml | 4 ++++ .../style_03/style_03_college_medicine.yaml | 4 ++++ .../style_03/style_03_college_physics.yaml | 4 ++++ .../style_03/style_03_computer_security.yaml | 4 ++++ .../style_03/style_03_conceptual_physics.yaml | 4 ++++ .../style_03/style_03_econometrics.yaml | 4 ++++ .../style_03/style_03_electrical_engineering.yaml | 4 ++++ .../style_03/style_03_elementary_mathematics.yaml | 4 ++++ .../style_03/style_03_formal_logic.yaml | 4 ++++ .../style_03/style_03_global_facts.yaml | 4 ++++ .../style_03/style_03_high_school_biology.yaml | 4 ++++ .../style_03/style_03_high_school_chemistry.yaml | 4 ++++ .../style_03_high_school_computer_science.yaml | 4 ++++ .../style_03_high_school_european_history.yaml | 4 ++++ .../style_03/style_03_high_school_geography.yaml | 4 ++++ ...le_03_high_school_government_and_politics.yaml | 4 ++++ .../style_03_high_school_macroeconomics.yaml | 4 ++++ .../style_03_high_school_mathematics.yaml | 4 ++++ .../style_03_high_school_microeconomics.yaml | 4 ++++ .../style_03/style_03_high_school_physics.yaml | 4 ++++ .../style_03/style_03_high_school_psychology.yaml | 4 ++++ .../style_03/style_03_high_school_statistics.yaml | 4 ++++ .../style_03/style_03_high_school_us_history.yaml | 4 ++++ .../style_03_high_school_world_history.yaml | 4 ++++ .../style_03/style_03_human_aging.yaml | 4 ++++ .../style_03/style_03_human_sexuality.yaml | 4 ++++ .../style_03/style_03_international_law.yaml | 4 ++++ .../style_03/style_03_jurisprudence.yaml | 4 ++++ .../style_03/style_03_logical_fallacies.yaml | 4 ++++ .../style_03/style_03_machine_learning.yaml | 4 ++++ .../style_03/style_03_management.yaml | 4 ++++ .../style_03/style_03_marketing.yaml | 4 ++++ .../style_03/style_03_medical_genetics.yaml | 4 ++++ .../style_03/style_03_miscellaneous.yaml | 4 ++++ .../style_03/style_03_moral_disputes.yaml | 4 ++++ .../style_03/style_03_moral_scenarios.yaml | 4 ++++ .../style_03/style_03_nutrition.yaml | 4 ++++ .../style_03/style_03_philosophy.yaml | 4 ++++ .../style_03/style_03_prehistory.yaml | 4 ++++ .../style_03_professional_accounting.yaml | 4 ++++ .../style_03/style_03_professional_law.yaml | 4 ++++ .../style_03/style_03_professional_medicine.yaml | 4 ++++ .../style_03_professional_psychology.yaml | 4 ++++ .../style_03/style_03_public_relations.yaml | 4 ++++ .../style_03/style_03_security_studies.yaml | 4 ++++ .../style_03/style_03_sociology.yaml | 4 ++++ .../style_03/style_03_us_foreign_policy.yaml | 4 ++++ .../style_03/style_03_virology.yaml | 4 ++++ .../style_03/style_03_world_religions.yaml | 4 ++++ .../full_continuation/style_04/_template_yaml | 15 +++++++++++++++ .../style_04/style_04_abstract_algebra.yaml | 4 ++++ .../style_04/style_04_anatomy.yaml | 4 ++++ .../style_04/style_04_astronomy.yaml | 4 ++++ .../style_04/style_04_business_ethics.yaml | 4 ++++ .../style_04/style_04_clinical_knowledge.yaml | 4 ++++ .../style_04/style_04_college_biology.yaml | 4 ++++ .../style_04/style_04_college_chemistry.yaml | 4 ++++ .../style_04_college_computer_science.yaml | 4 ++++ .../style_04/style_04_college_mathematics.yaml | 4 ++++ .../style_04/style_04_college_medicine.yaml | 4 ++++ .../style_04/style_04_college_physics.yaml | 4 ++++ .../style_04/style_04_computer_security.yaml | 4 ++++ .../style_04/style_04_conceptual_physics.yaml | 4 ++++ .../style_04/style_04_econometrics.yaml | 4 ++++ .../style_04/style_04_electrical_engineering.yaml | 4 ++++ .../style_04/style_04_elementary_mathematics.yaml | 4 ++++ .../style_04/style_04_formal_logic.yaml | 4 ++++ .../style_04/style_04_global_facts.yaml | 4 ++++ .../style_04/style_04_high_school_biology.yaml | 4 ++++ .../style_04/style_04_high_school_chemistry.yaml | 4 ++++ .../style_04_high_school_computer_science.yaml | 4 ++++ .../style_04_high_school_european_history.yaml | 4 ++++ .../style_04/style_04_high_school_geography.yaml | 4 ++++ ...le_04_high_school_government_and_politics.yaml | 4 ++++ .../style_04_high_school_macroeconomics.yaml | 4 ++++ .../style_04_high_school_mathematics.yaml | 4 ++++ .../style_04_high_school_microeconomics.yaml | 4 ++++ .../style_04/style_04_high_school_physics.yaml | 4 ++++ .../style_04/style_04_high_school_psychology.yaml | 4 ++++ .../style_04/style_04_high_school_statistics.yaml | 4 ++++ .../style_04/style_04_high_school_us_history.yaml | 4 ++++ .../style_04_high_school_world_history.yaml | 4 ++++ .../style_04/style_04_human_aging.yaml | 4 ++++ .../style_04/style_04_human_sexuality.yaml | 4 ++++ .../style_04/style_04_international_law.yaml | 4 ++++ .../style_04/style_04_jurisprudence.yaml | 4 ++++ .../style_04/style_04_logical_fallacies.yaml | 4 ++++ .../style_04/style_04_machine_learning.yaml | 4 ++++ .../style_04/style_04_management.yaml | 4 ++++ .../style_04/style_04_marketing.yaml | 4 ++++ .../style_04/style_04_medical_genetics.yaml | 4 ++++ .../style_04/style_04_miscellaneous.yaml | 4 ++++ .../style_04/style_04_moral_disputes.yaml | 4 ++++ .../style_04/style_04_moral_scenarios.yaml | 4 ++++ .../style_04/style_04_nutrition.yaml | 4 ++++ .../style_04/style_04_philosophy.yaml | 4 ++++ .../style_04/style_04_prehistory.yaml | 4 ++++ .../style_04_professional_accounting.yaml | 4 ++++ .../style_04/style_04_professional_law.yaml | 4 ++++ .../style_04/style_04_professional_medicine.yaml | 4 ++++ .../style_04_professional_psychology.yaml | 4 ++++ .../style_04/style_04_public_relations.yaml | 4 ++++ .../style_04/style_04_security_studies.yaml | 4 ++++ .../style_04/style_04_sociology.yaml | 4 ++++ .../style_04/style_04_us_foreign_policy.yaml | 4 ++++ .../style_04/style_04_virology.yaml | 4 ++++ .../style_04/style_04_world_religions.yaml | 4 ++++ .../full_continuation/style_05/_template_yaml | 15 +++++++++++++++ .../style_05/style_05_abstract_algebra.yaml | 4 ++++ .../style_05/style_05_anatomy.yaml | 4 ++++ .../style_05/style_05_astronomy.yaml | 4 ++++ .../style_05/style_05_business_ethics.yaml | 4 ++++ .../style_05/style_05_clinical_knowledge.yaml | 4 ++++ .../style_05/style_05_college_biology.yaml | 4 ++++ .../style_05/style_05_college_chemistry.yaml | 4 ++++ .../style_05_college_computer_science.yaml | 4 ++++ .../style_05/style_05_college_mathematics.yaml | 4 ++++ .../style_05/style_05_college_medicine.yaml | 4 ++++ .../style_05/style_05_college_physics.yaml | 4 ++++ .../style_05/style_05_computer_security.yaml | 4 ++++ .../style_05/style_05_conceptual_physics.yaml | 4 ++++ .../style_05/style_05_econometrics.yaml | 4 ++++ .../style_05/style_05_electrical_engineering.yaml | 4 ++++ .../style_05/style_05_elementary_mathematics.yaml | 4 ++++ .../style_05/style_05_formal_logic.yaml | 4 ++++ .../style_05/style_05_global_facts.yaml | 4 ++++ .../style_05/style_05_high_school_biology.yaml | 4 ++++ .../style_05/style_05_high_school_chemistry.yaml | 4 ++++ .../style_05_high_school_computer_science.yaml | 4 ++++ .../style_05_high_school_european_history.yaml | 4 ++++ .../style_05/style_05_high_school_geography.yaml | 4 ++++ ...le_05_high_school_government_and_politics.yaml | 4 ++++ .../style_05_high_school_macroeconomics.yaml | 4 ++++ .../style_05_high_school_mathematics.yaml | 4 ++++ .../style_05_high_school_microeconomics.yaml | 4 ++++ .../style_05/style_05_high_school_physics.yaml | 4 ++++ .../style_05/style_05_high_school_psychology.yaml | 4 ++++ .../style_05/style_05_high_school_statistics.yaml | 4 ++++ .../style_05/style_05_high_school_us_history.yaml | 4 ++++ .../style_05_high_school_world_history.yaml | 4 ++++ .../style_05/style_05_human_aging.yaml | 4 ++++ .../style_05/style_05_human_sexuality.yaml | 4 ++++ .../style_05/style_05_international_law.yaml | 4 ++++ .../style_05/style_05_jurisprudence.yaml | 4 ++++ .../style_05/style_05_logical_fallacies.yaml | 4 ++++ .../style_05/style_05_machine_learning.yaml | 4 ++++ .../style_05/style_05_management.yaml | 4 ++++ .../style_05/style_05_marketing.yaml | 4 ++++ .../style_05/style_05_medical_genetics.yaml | 4 ++++ .../style_05/style_05_miscellaneous.yaml | 4 ++++ .../style_05/style_05_moral_disputes.yaml | 4 ++++ .../style_05/style_05_moral_scenarios.yaml | 4 ++++ .../style_05/style_05_nutrition.yaml | 4 ++++ .../style_05/style_05_philosophy.yaml | 4 ++++ .../style_05/style_05_prehistory.yaml | 4 ++++ .../style_05_professional_accounting.yaml | 4 ++++ .../style_05/style_05_professional_law.yaml | 4 ++++ .../style_05/style_05_professional_medicine.yaml | 4 ++++ .../style_05_professional_psychology.yaml | 4 ++++ .../style_05/style_05_public_relations.yaml | 4 ++++ .../style_05/style_05_security_studies.yaml | 4 ++++ .../style_05/style_05_sociology.yaml | 4 ++++ .../style_05/style_05_us_foreign_policy.yaml | 4 ++++ .../style_05/style_05_virology.yaml | 4 ++++ .../style_05/style_05_world_religions.yaml | 4 ++++ .../letters_only/style_01/_template_yaml | 15 +++++++++++++++ .../style_01/style_01_abstract_algebra.yaml | 4 ++++ .../letters_only/style_01/style_01_anatomy.yaml | 4 ++++ .../letters_only/style_01/style_01_astronomy.yaml | 4 ++++ .../style_01/style_01_business_ethics.yaml | 4 ++++ .../style_01/style_01_clinical_knowledge.yaml | 4 ++++ .../style_01/style_01_college_biology.yaml | 4 ++++ .../style_01/style_01_college_chemistry.yaml | 4 ++++ .../style_01_college_computer_science.yaml | 4 ++++ .../style_01/style_01_college_mathematics.yaml | 4 ++++ .../style_01/style_01_college_medicine.yaml | 4 ++++ .../style_01/style_01_college_physics.yaml | 4 ++++ .../style_01/style_01_computer_security.yaml | 4 ++++ .../style_01/style_01_conceptual_physics.yaml | 4 ++++ .../style_01/style_01_econometrics.yaml | 4 ++++ .../style_01/style_01_electrical_engineering.yaml | 4 ++++ .../style_01/style_01_elementary_mathematics.yaml | 4 ++++ .../style_01/style_01_formal_logic.yaml | 4 ++++ .../style_01/style_01_global_facts.yaml | 4 ++++ .../style_01/style_01_high_school_biology.yaml | 4 ++++ .../style_01/style_01_high_school_chemistry.yaml | 4 ++++ .../style_01_high_school_computer_science.yaml | 4 ++++ .../style_01_high_school_european_history.yaml | 4 ++++ .../style_01/style_01_high_school_geography.yaml | 4 ++++ ...le_01_high_school_government_and_politics.yaml | 4 ++++ .../style_01_high_school_macroeconomics.yaml | 4 ++++ .../style_01_high_school_mathematics.yaml | 4 ++++ .../style_01_high_school_microeconomics.yaml | 4 ++++ .../style_01/style_01_high_school_physics.yaml | 4 ++++ .../style_01/style_01_high_school_psychology.yaml | 4 ++++ .../style_01/style_01_high_school_statistics.yaml | 4 ++++ .../style_01/style_01_high_school_us_history.yaml | 4 ++++ .../style_01_high_school_world_history.yaml | 4 ++++ .../style_01/style_01_human_aging.yaml | 4 ++++ .../style_01/style_01_human_sexuality.yaml | 4 ++++ .../style_01/style_01_international_law.yaml | 4 ++++ .../style_01/style_01_jurisprudence.yaml | 4 ++++ .../style_01/style_01_logical_fallacies.yaml | 4 ++++ .../style_01/style_01_machine_learning.yaml | 4 ++++ .../style_01/style_01_management.yaml | 4 ++++ .../letters_only/style_01/style_01_marketing.yaml | 4 ++++ .../style_01/style_01_medical_genetics.yaml | 4 ++++ .../style_01/style_01_miscellaneous.yaml | 4 ++++ .../style_01/style_01_moral_disputes.yaml | 4 ++++ .../style_01/style_01_moral_scenarios.yaml | 4 ++++ .../letters_only/style_01/style_01_nutrition.yaml | 4 ++++ .../style_01/style_01_philosophy.yaml | 4 ++++ .../style_01/style_01_prehistory.yaml | 4 ++++ .../style_01_professional_accounting.yaml | 4 ++++ .../style_01/style_01_professional_law.yaml | 4 ++++ .../style_01/style_01_professional_medicine.yaml | 4 ++++ .../style_01_professional_psychology.yaml | 4 ++++ .../style_01/style_01_public_relations.yaml | 4 ++++ .../style_01/style_01_security_studies.yaml | 4 ++++ .../letters_only/style_01/style_01_sociology.yaml | 4 ++++ .../style_01/style_01_us_foreign_policy.yaml | 4 ++++ .../letters_only/style_01/style_01_virology.yaml | 4 ++++ .../style_01/style_01_world_religions.yaml | 4 ++++ .../letters_only/style_02/_template_yaml | 15 +++++++++++++++ .../style_02/style_02_abstract_algebra.yaml | 4 ++++ .../letters_only/style_02/style_02_anatomy.yaml | 4 ++++ .../letters_only/style_02/style_02_astronomy.yaml | 4 ++++ .../style_02/style_02_business_ethics.yaml | 4 ++++ .../style_02/style_02_clinical_knowledge.yaml | 4 ++++ .../style_02/style_02_college_biology.yaml | 4 ++++ .../style_02/style_02_college_chemistry.yaml | 4 ++++ .../style_02_college_computer_science.yaml | 4 ++++ .../style_02/style_02_college_mathematics.yaml | 4 ++++ .../style_02/style_02_college_medicine.yaml | 4 ++++ .../style_02/style_02_college_physics.yaml | 4 ++++ .../style_02/style_02_computer_security.yaml | 4 ++++ .../style_02/style_02_conceptual_physics.yaml | 4 ++++ .../style_02/style_02_econometrics.yaml | 4 ++++ .../style_02/style_02_electrical_engineering.yaml | 4 ++++ .../style_02/style_02_elementary_mathematics.yaml | 4 ++++ .../style_02/style_02_formal_logic.yaml | 4 ++++ .../style_02/style_02_global_facts.yaml | 4 ++++ .../style_02/style_02_high_school_biology.yaml | 4 ++++ .../style_02/style_02_high_school_chemistry.yaml | 4 ++++ .../style_02_high_school_computer_science.yaml | 4 ++++ .../style_02_high_school_european_history.yaml | 4 ++++ .../style_02/style_02_high_school_geography.yaml | 4 ++++ ...le_02_high_school_government_and_politics.yaml | 4 ++++ .../style_02_high_school_macroeconomics.yaml | 4 ++++ .../style_02_high_school_mathematics.yaml | 4 ++++ .../style_02_high_school_microeconomics.yaml | 4 ++++ .../style_02/style_02_high_school_physics.yaml | 4 ++++ .../style_02/style_02_high_school_psychology.yaml | 4 ++++ .../style_02/style_02_high_school_statistics.yaml | 4 ++++ .../style_02/style_02_high_school_us_history.yaml | 4 ++++ .../style_02_high_school_world_history.yaml | 4 ++++ .../style_02/style_02_human_aging.yaml | 4 ++++ .../style_02/style_02_human_sexuality.yaml | 4 ++++ .../style_02/style_02_international_law.yaml | 4 ++++ .../style_02/style_02_jurisprudence.yaml | 4 ++++ .../style_02/style_02_logical_fallacies.yaml | 4 ++++ .../style_02/style_02_machine_learning.yaml | 4 ++++ .../style_02/style_02_management.yaml | 4 ++++ .../letters_only/style_02/style_02_marketing.yaml | 4 ++++ .../style_02/style_02_medical_genetics.yaml | 4 ++++ .../style_02/style_02_miscellaneous.yaml | 4 ++++ .../style_02/style_02_moral_disputes.yaml | 4 ++++ .../style_02/style_02_moral_scenarios.yaml | 4 ++++ .../letters_only/style_02/style_02_nutrition.yaml | 4 ++++ .../style_02/style_02_philosophy.yaml | 4 ++++ .../style_02/style_02_prehistory.yaml | 4 ++++ .../style_02_professional_accounting.yaml | 4 ++++ .../style_02/style_02_professional_law.yaml | 4 ++++ .../style_02/style_02_professional_medicine.yaml | 4 ++++ .../style_02_professional_psychology.yaml | 4 ++++ .../style_02/style_02_public_relations.yaml | 4 ++++ .../style_02/style_02_security_studies.yaml | 4 ++++ .../letters_only/style_02/style_02_sociology.yaml | 4 ++++ .../style_02/style_02_us_foreign_policy.yaml | 4 ++++ .../letters_only/style_02/style_02_virology.yaml | 4 ++++ .../style_02/style_02_world_religions.yaml | 4 ++++ .../letters_only/style_03/_template_yaml | 15 +++++++++++++++ .../style_03/style_03_abstract_algebra.yaml | 4 ++++ .../letters_only/style_03/style_03_anatomy.yaml | 4 ++++ .../letters_only/style_03/style_03_astronomy.yaml | 4 ++++ .../style_03/style_03_business_ethics.yaml | 4 ++++ .../style_03/style_03_clinical_knowledge.yaml | 4 ++++ .../style_03/style_03_college_biology.yaml | 4 ++++ .../style_03/style_03_college_chemistry.yaml | 4 ++++ .../style_03_college_computer_science.yaml | 4 ++++ .../style_03/style_03_college_mathematics.yaml | 4 ++++ .../style_03/style_03_college_medicine.yaml | 4 ++++ .../style_03/style_03_college_physics.yaml | 4 ++++ .../style_03/style_03_computer_security.yaml | 4 ++++ .../style_03/style_03_conceptual_physics.yaml | 4 ++++ .../style_03/style_03_econometrics.yaml | 4 ++++ .../style_03/style_03_electrical_engineering.yaml | 4 ++++ .../style_03/style_03_elementary_mathematics.yaml | 4 ++++ .../style_03/style_03_formal_logic.yaml | 4 ++++ .../style_03/style_03_global_facts.yaml | 4 ++++ .../style_03/style_03_high_school_biology.yaml | 4 ++++ .../style_03/style_03_high_school_chemistry.yaml | 4 ++++ .../style_03_high_school_computer_science.yaml | 4 ++++ .../style_03_high_school_european_history.yaml | 4 ++++ .../style_03/style_03_high_school_geography.yaml | 4 ++++ ...le_03_high_school_government_and_politics.yaml | 4 ++++ .../style_03_high_school_macroeconomics.yaml | 4 ++++ .../style_03_high_school_mathematics.yaml | 4 ++++ .../style_03_high_school_microeconomics.yaml | 4 ++++ .../style_03/style_03_high_school_physics.yaml | 4 ++++ .../style_03/style_03_high_school_psychology.yaml | 4 ++++ .../style_03/style_03_high_school_statistics.yaml | 4 ++++ .../style_03/style_03_high_school_us_history.yaml | 4 ++++ .../style_03_high_school_world_history.yaml | 4 ++++ .../style_03/style_03_human_aging.yaml | 4 ++++ .../style_03/style_03_human_sexuality.yaml | 4 ++++ .../style_03/style_03_international_law.yaml | 4 ++++ .../style_03/style_03_jurisprudence.yaml | 4 ++++ .../style_03/style_03_logical_fallacies.yaml | 4 ++++ .../style_03/style_03_machine_learning.yaml | 4 ++++ .../style_03/style_03_management.yaml | 4 ++++ .../letters_only/style_03/style_03_marketing.yaml | 4 ++++ .../style_03/style_03_medical_genetics.yaml | 4 ++++ .../style_03/style_03_miscellaneous.yaml | 4 ++++ .../style_03/style_03_moral_disputes.yaml | 4 ++++ .../style_03/style_03_moral_scenarios.yaml | 4 ++++ .../letters_only/style_03/style_03_nutrition.yaml | 4 ++++ .../style_03/style_03_philosophy.yaml | 4 ++++ .../style_03/style_03_prehistory.yaml | 4 ++++ .../style_03_professional_accounting.yaml | 4 ++++ .../style_03/style_03_professional_law.yaml | 4 ++++ .../style_03/style_03_professional_medicine.yaml | 4 ++++ .../style_03_professional_psychology.yaml | 4 ++++ .../style_03/style_03_public_relations.yaml | 4 ++++ .../style_03/style_03_security_studies.yaml | 4 ++++ .../letters_only/style_03/style_03_sociology.yaml | 4 ++++ .../style_03/style_03_us_foreign_policy.yaml | 4 ++++ .../letters_only/style_03/style_03_virology.yaml | 4 ++++ .../style_03/style_03_world_religions.yaml | 4 ++++ .../letters_only/style_04/_template_yaml | 15 +++++++++++++++ .../style_04/style_04_abstract_algebra.yaml | 4 ++++ .../letters_only/style_04/style_04_anatomy.yaml | 4 ++++ .../letters_only/style_04/style_04_astronomy.yaml | 4 ++++ .../style_04/style_04_business_ethics.yaml | 4 ++++ .../style_04/style_04_clinical_knowledge.yaml | 4 ++++ .../style_04/style_04_college_biology.yaml | 4 ++++ .../style_04/style_04_college_chemistry.yaml | 4 ++++ .../style_04_college_computer_science.yaml | 4 ++++ .../style_04/style_04_college_mathematics.yaml | 4 ++++ .../style_04/style_04_college_medicine.yaml | 4 ++++ .../style_04/style_04_college_physics.yaml | 4 ++++ .../style_04/style_04_computer_security.yaml | 4 ++++ .../style_04/style_04_conceptual_physics.yaml | 4 ++++ .../style_04/style_04_econometrics.yaml | 4 ++++ .../style_04/style_04_electrical_engineering.yaml | 4 ++++ .../style_04/style_04_elementary_mathematics.yaml | 4 ++++ .../style_04/style_04_formal_logic.yaml | 4 ++++ .../style_04/style_04_global_facts.yaml | 4 ++++ .../style_04/style_04_high_school_biology.yaml | 4 ++++ .../style_04/style_04_high_school_chemistry.yaml | 4 ++++ .../style_04_high_school_computer_science.yaml | 4 ++++ .../style_04_high_school_european_history.yaml | 4 ++++ .../style_04/style_04_high_school_geography.yaml | 4 ++++ ...le_04_high_school_government_and_politics.yaml | 4 ++++ .../style_04_high_school_macroeconomics.yaml | 4 ++++ .../style_04_high_school_mathematics.yaml | 4 ++++ .../style_04_high_school_microeconomics.yaml | 4 ++++ .../style_04/style_04_high_school_physics.yaml | 4 ++++ .../style_04/style_04_high_school_psychology.yaml | 4 ++++ .../style_04/style_04_high_school_statistics.yaml | 4 ++++ .../style_04/style_04_high_school_us_history.yaml | 4 ++++ .../style_04_high_school_world_history.yaml | 4 ++++ .../style_04/style_04_human_aging.yaml | 4 ++++ .../style_04/style_04_human_sexuality.yaml | 4 ++++ .../style_04/style_04_international_law.yaml | 4 ++++ .../style_04/style_04_jurisprudence.yaml | 4 ++++ .../style_04/style_04_logical_fallacies.yaml | 4 ++++ .../style_04/style_04_machine_learning.yaml | 4 ++++ .../style_04/style_04_management.yaml | 4 ++++ .../letters_only/style_04/style_04_marketing.yaml | 4 ++++ .../style_04/style_04_medical_genetics.yaml | 4 ++++ .../style_04/style_04_miscellaneous.yaml | 4 ++++ .../style_04/style_04_moral_disputes.yaml | 4 ++++ .../style_04/style_04_moral_scenarios.yaml | 4 ++++ .../letters_only/style_04/style_04_nutrition.yaml | 4 ++++ .../style_04/style_04_philosophy.yaml | 4 ++++ .../style_04/style_04_prehistory.yaml | 4 ++++ .../style_04_professional_accounting.yaml | 4 ++++ .../style_04/style_04_professional_law.yaml | 4 ++++ .../style_04/style_04_professional_medicine.yaml | 4 ++++ .../style_04_professional_psychology.yaml | 4 ++++ .../style_04/style_04_public_relations.yaml | 4 ++++ .../style_04/style_04_security_studies.yaml | 4 ++++ .../letters_only/style_04/style_04_sociology.yaml | 4 ++++ .../style_04/style_04_us_foreign_policy.yaml | 4 ++++ .../letters_only/style_04/style_04_virology.yaml | 4 ++++ .../style_04/style_04_world_religions.yaml | 4 ++++ .../letters_only/style_05/_template_yaml | 15 +++++++++++++++ .../style_05/style_05_abstract_algebra.yaml | 4 ++++ .../letters_only/style_05/style_05_anatomy.yaml | 4 ++++ .../letters_only/style_05/style_05_astronomy.yaml | 4 ++++ .../style_05/style_05_business_ethics.yaml | 4 ++++ .../style_05/style_05_clinical_knowledge.yaml | 4 ++++ .../style_05/style_05_college_biology.yaml | 4 ++++ .../style_05/style_05_college_chemistry.yaml | 4 ++++ .../style_05_college_computer_science.yaml | 4 ++++ .../style_05/style_05_college_mathematics.yaml | 4 ++++ .../style_05/style_05_college_medicine.yaml | 4 ++++ .../style_05/style_05_college_physics.yaml | 4 ++++ .../style_05/style_05_computer_security.yaml | 4 ++++ .../style_05/style_05_conceptual_physics.yaml | 4 ++++ .../style_05/style_05_econometrics.yaml | 4 ++++ .../style_05/style_05_electrical_engineering.yaml | 4 ++++ .../style_05/style_05_elementary_mathematics.yaml | 4 ++++ .../style_05/style_05_formal_logic.yaml | 4 ++++ .../style_05/style_05_global_facts.yaml | 4 ++++ .../style_05/style_05_high_school_biology.yaml | 4 ++++ .../style_05/style_05_high_school_chemistry.yaml | 4 ++++ .../style_05_high_school_computer_science.yaml | 4 ++++ .../style_05_high_school_european_history.yaml | 4 ++++ .../style_05/style_05_high_school_geography.yaml | 4 ++++ ...le_05_high_school_government_and_politics.yaml | 4 ++++ .../style_05_high_school_macroeconomics.yaml | 4 ++++ .../style_05_high_school_mathematics.yaml | 4 ++++ .../style_05_high_school_microeconomics.yaml | 4 ++++ .../style_05/style_05_high_school_physics.yaml | 4 ++++ .../style_05/style_05_high_school_psychology.yaml | 4 ++++ .../style_05/style_05_high_school_statistics.yaml | 4 ++++ .../style_05/style_05_high_school_us_history.yaml | 4 ++++ .../style_05_high_school_world_history.yaml | 4 ++++ .../style_05/style_05_human_aging.yaml | 4 ++++ .../style_05/style_05_human_sexuality.yaml | 4 ++++ .../style_05/style_05_international_law.yaml | 4 ++++ .../style_05/style_05_jurisprudence.yaml | 4 ++++ .../style_05/style_05_logical_fallacies.yaml | 4 ++++ .../style_05/style_05_machine_learning.yaml | 4 ++++ .../style_05/style_05_management.yaml | 4 ++++ .../letters_only/style_05/style_05_marketing.yaml | 4 ++++ .../style_05/style_05_medical_genetics.yaml | 4 ++++ .../style_05/style_05_miscellaneous.yaml | 4 ++++ .../style_05/style_05_moral_disputes.yaml | 4 ++++ .../style_05/style_05_moral_scenarios.yaml | 4 ++++ .../letters_only/style_05/style_05_nutrition.yaml | 4 ++++ .../style_05/style_05_philosophy.yaml | 4 ++++ .../style_05/style_05_prehistory.yaml | 4 ++++ .../style_05_professional_accounting.yaml | 4 ++++ .../style_05/style_05_professional_law.yaml | 4 ++++ .../style_05/style_05_professional_medicine.yaml | 4 ++++ .../style_05_professional_psychology.yaml | 4 ++++ .../style_05/style_05_public_relations.yaml | 4 ++++ .../style_05/style_05_security_studies.yaml | 4 ++++ .../letters_only/style_05/style_05_sociology.yaml | 4 ++++ .../style_05/style_05_us_foreign_policy.yaml | 4 ++++ .../letters_only/style_05/style_05_virology.yaml | 4 ++++ .../style_05/style_05_world_religions.yaml | 4 ++++ .../mmlu_alternative_worlds_fc.yaml | 7 +++++++ .../mmlu_alternative_worlds_lo.yaml | 7 +++++++ 582 files changed, 2444 insertions(+) create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml new file mode 100644 index 00000000..1a5cb4cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_01_fc +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}}\n(B) {{choices[1]}}\n(C) {{choices[2]}}\n(D) {{choices[3]}}\nA: " +doc_to_choice: "{{['(A) '+choices[0], '(B) '+choices[1], '(C) '+choices[2], '(D) '+choices[3]]}}" # "{{choices}}" # ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml new file mode 100644 index 00000000..6a6a8643 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml new file mode 100644 index 00000000..f0a6d17b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml new file mode 100644 index 00000000..cf182ebb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml new file mode 100644 index 00000000..54f8af54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml new file mode 100644 index 00000000..8f49c1db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml new file mode 100644 index 00000000..bf6d1c4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml new file mode 100644 index 00000000..5e34ce01 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml new file mode 100644 index 00000000..cdf093a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml new file mode 100644 index 00000000..23c08a30 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml new file mode 100644 index 00000000..478aa690 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml new file mode 100644 index 00000000..ae450418 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml new file mode 100644 index 00000000..6242db7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml new file mode 100644 index 00000000..1d0e623a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml new file mode 100644 index 00000000..92c0cf93 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml new file mode 100644 index 00000000..0fcce024 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml new file mode 100644 index 00000000..00e3b422 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml new file mode 100644 index 00000000..6ee8b4bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml new file mode 100644 index 00000000..aa601e97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml new file mode 100644 index 00000000..705b4eba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml new file mode 100644 index 00000000..755abc3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml new file mode 100644 index 00000000..a93522e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml new file mode 100644 index 00000000..85779283 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml new file mode 100644 index 00000000..13b47bc0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml new file mode 100644 index 00000000..c4d0baa6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b2b1b2e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml new file mode 100644 index 00000000..95257104 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml new file mode 100644 index 00000000..13b10862 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml new file mode 100644 index 00000000..3adfaf4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml new file mode 100644 index 00000000..6e8f2841 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml new file mode 100644 index 00000000..fa1d1033 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml new file mode 100644 index 00000000..71d33cc3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml new file mode 100644 index 00000000..0c1373f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml new file mode 100644 index 00000000..2049c970 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml new file mode 100644 index 00000000..cfe7e9a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml new file mode 100644 index 00000000..e24395bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml new file mode 100644 index 00000000..4aeac5f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml new file mode 100644 index 00000000..d0299c7e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml new file mode 100644 index 00000000..2dd00d57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml new file mode 100644 index 00000000..85db5f7e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml new file mode 100644 index 00000000..e149454a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml new file mode 100644 index 00000000..030c4245 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml new file mode 100644 index 00000000..5ac6c654 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml new file mode 100644 index 00000000..89dd4970 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml new file mode 100644 index 00000000..643a3dac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml new file mode 100644 index 00000000..e7c52ba9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml new file mode 100644 index 00000000..0b669ded --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml new file mode 100644 index 00000000..baed2437 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml new file mode 100644 index 00000000..86cfa62c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml new file mode 100644 index 00000000..415383d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml new file mode 100644 index 00000000..370881b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml new file mode 100644 index 00000000..83b0fb51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml new file mode 100644 index 00000000..f812fb1d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml new file mode 100644 index 00000000..aed4580f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml new file mode 100644 index 00000000..ec71095a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml new file mode 100644 index 00000000..894433eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml new file mode 100644 index 00000000..abadbf46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml new file mode 100644 index 00000000..fec1a23d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml new file mode 100644 index 00000000..396cf875 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_02_fc +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nA: " +doc_to_choice: "{{['A. '+choices[0], 'B. '+choices[1], 'C. '+choices[2], 'D. '+choices[3]]}}" +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml new file mode 100644 index 00000000..993f4ed4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml new file mode 100644 index 00000000..154c8bde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml new file mode 100644 index 00000000..80b5f529 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml new file mode 100644 index 00000000..d401a727 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml new file mode 100644 index 00000000..88062d80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml new file mode 100644 index 00000000..2935671f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml new file mode 100644 index 00000000..96ca75d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml new file mode 100644 index 00000000..1b77265b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml new file mode 100644 index 00000000..c2264362 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml new file mode 100644 index 00000000..febcf8cc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml new file mode 100644 index 00000000..4685a383 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml new file mode 100644 index 00000000..b70bd16b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml new file mode 100644 index 00000000..f8fd546c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml new file mode 100644 index 00000000..ce61dd15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml new file mode 100644 index 00000000..c2b49225 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml new file mode 100644 index 00000000..1d3f8ec6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml new file mode 100644 index 00000000..1a626cd3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml new file mode 100644 index 00000000..d08193c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml new file mode 100644 index 00000000..505a9c3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml new file mode 100644 index 00000000..540e5c7c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml new file mode 100644 index 00000000..0e628287 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml new file mode 100644 index 00000000..b202e393 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml new file mode 100644 index 00000000..1ccfbc31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2a6fdcf1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml new file mode 100644 index 00000000..c6b0d4f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml new file mode 100644 index 00000000..17e7a054 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml new file mode 100644 index 00000000..53a36c3d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml new file mode 100644 index 00000000..d65678c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml new file mode 100644 index 00000000..bb4940d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml new file mode 100644 index 00000000..ea8f90fc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml new file mode 100644 index 00000000..0a00e301 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml new file mode 100644 index 00000000..d43c94bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml new file mode 100644 index 00000000..82510fdb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml new file mode 100644 index 00000000..3351ff16 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml new file mode 100644 index 00000000..c2e2d8ad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml new file mode 100644 index 00000000..a281191c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml new file mode 100644 index 00000000..248e2e38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml new file mode 100644 index 00000000..990f6d0c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml new file mode 100644 index 00000000..72f13b4a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml new file mode 100644 index 00000000..93d298d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml new file mode 100644 index 00000000..45737778 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml new file mode 100644 index 00000000..5f570691 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml new file mode 100644 index 00000000..f8388171 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml new file mode 100644 index 00000000..b4dbfbcf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml new file mode 100644 index 00000000..f0a1af26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml new file mode 100644 index 00000000..4455fc2a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml new file mode 100644 index 00000000..4c6c0b9a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml new file mode 100644 index 00000000..4ae50925 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml new file mode 100644 index 00000000..ddf67b8b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml new file mode 100644 index 00000000..b8a1e1c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml new file mode 100644 index 00000000..343c3d35 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml new file mode 100644 index 00000000..15cbcc5c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml new file mode 100644 index 00000000..2315f848 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml new file mode 100644 index 00000000..97fa859e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml new file mode 100644 index 00000000..dd5510be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml new file mode 100644 index 00000000..0ebdc549 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml new file mode 100644 index 00000000..ab076ec5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml new file mode 100644 index 00000000..074aeef0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_03_fc +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA) {{choices[0]}}\nB) {{choices[1]}}\nC) {{choices[2]}}\nD) {{choices[3]}}\nA: " +doc_to_choice: "{{['A) '+choices[0], 'B) '+choices[1], 'C) '+choices[2], 'D) '+choices[3]]}}" # "{{choices}}" # ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml new file mode 100644 index 00000000..00454841 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml new file mode 100644 index 00000000..8fc0c2ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml new file mode 100644 index 00000000..dc7d0971 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml new file mode 100644 index 00000000..e245bd91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml new file mode 100644 index 00000000..4da57590 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml new file mode 100644 index 00000000..33d821b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml new file mode 100644 index 00000000..246d5c0d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml new file mode 100644 index 00000000..393b6636 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml new file mode 100644 index 00000000..0636d0b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml new file mode 100644 index 00000000..8c189982 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml new file mode 100644 index 00000000..9b0f07cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml new file mode 100644 index 00000000..40f6a6c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml new file mode 100644 index 00000000..0ffc95d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml new file mode 100644 index 00000000..f538431f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml new file mode 100644 index 00000000..4c21bede --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml new file mode 100644 index 00000000..759dba03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml new file mode 100644 index 00000000..2754379a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml new file mode 100644 index 00000000..997c431e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml new file mode 100644 index 00000000..626cfb14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml new file mode 100644 index 00000000..566bd7b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml new file mode 100644 index 00000000..816448e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml new file mode 100644 index 00000000..1bda17bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml new file mode 100644 index 00000000..cca80255 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml new file mode 100644 index 00000000..08dffd01 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml new file mode 100644 index 00000000..4a259293 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml new file mode 100644 index 00000000..c09513e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml new file mode 100644 index 00000000..653ad185 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml new file mode 100644 index 00000000..3232299f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml new file mode 100644 index 00000000..7ae5bfa0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml new file mode 100644 index 00000000..1788a7cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml new file mode 100644 index 00000000..1cc70773 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml new file mode 100644 index 00000000..874dac59 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml new file mode 100644 index 00000000..6df374ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml new file mode 100644 index 00000000..1aac6d66 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml new file mode 100644 index 00000000..1493eef3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml new file mode 100644 index 00000000..cc43e5f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml new file mode 100644 index 00000000..69d01e84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml new file mode 100644 index 00000000..241765c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml new file mode 100644 index 00000000..52b3d13c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml new file mode 100644 index 00000000..7ae05fde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml new file mode 100644 index 00000000..526f5a3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml new file mode 100644 index 00000000..52d46c09 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml new file mode 100644 index 00000000..2eeba617 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml new file mode 100644 index 00000000..4ab76556 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml new file mode 100644 index 00000000..26318da3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml new file mode 100644 index 00000000..d011f57f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml new file mode 100644 index 00000000..5ff8a000 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml new file mode 100644 index 00000000..9b19f66b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml new file mode 100644 index 00000000..bb32235e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml new file mode 100644 index 00000000..f64d2914 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml new file mode 100644 index 00000000..c730d29b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml new file mode 100644 index 00000000..fe784f69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml new file mode 100644 index 00000000..5f7fc937 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml new file mode 100644 index 00000000..799ab788 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml new file mode 100644 index 00000000..81eb5ca6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml new file mode 100644 index 00000000..dd64c766 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml new file mode 100644 index 00000000..86f83eeb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml new file mode 100644 index 00000000..250705e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_04_fc +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA, {{choices[0]}}\nB, {{choices[1]}}\nC, {{choices[2]}}\nD, {{choices[3]}}\nA: " +doc_to_choice: "{{['A, '+choices[0], 'B, '+choices[1], 'C, '+choices[2], 'D, '+choices[3]]}}" # "{{choices}}" # ["(A)", "(B)", "(C)", "(D)"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml new file mode 100644 index 00000000..9e98fe5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml new file mode 100644 index 00000000..7f6d4fc6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml new file mode 100644 index 00000000..a679f24a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml new file mode 100644 index 00000000..643b18f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml new file mode 100644 index 00000000..e0c53ae4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml new file mode 100644 index 00000000..210863b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml new file mode 100644 index 00000000..948977c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml new file mode 100644 index 00000000..e6a20393 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml new file mode 100644 index 00000000..1fa68f36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml new file mode 100644 index 00000000..0b557e91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml new file mode 100644 index 00000000..9ea539e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml new file mode 100644 index 00000000..e8713f39 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml new file mode 100644 index 00000000..b348ae36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml new file mode 100644 index 00000000..af5812be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml new file mode 100644 index 00000000..66a5e9d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml new file mode 100644 index 00000000..ac85b2e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml new file mode 100644 index 00000000..24de56a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml new file mode 100644 index 00000000..1bf3a402 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml new file mode 100644 index 00000000..90a1142d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml new file mode 100644 index 00000000..8a55d49b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml new file mode 100644 index 00000000..bedf826d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml new file mode 100644 index 00000000..922b30b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml new file mode 100644 index 00000000..c64045c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml new file mode 100644 index 00000000..51e1f917 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0e06b5df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml new file mode 100644 index 00000000..66c9ecb2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml new file mode 100644 index 00000000..641933b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml new file mode 100644 index 00000000..30424296 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml new file mode 100644 index 00000000..39b8eb36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml new file mode 100644 index 00000000..07c41317 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml new file mode 100644 index 00000000..6c6a10bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml new file mode 100644 index 00000000..a540f03d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml new file mode 100644 index 00000000..540daaae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml new file mode 100644 index 00000000..dc5407d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml new file mode 100644 index 00000000..fbc74f36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml new file mode 100644 index 00000000..522a5d45 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml new file mode 100644 index 00000000..f7ce9484 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml new file mode 100644 index 00000000..38a6d060 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml new file mode 100644 index 00000000..90925876 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml new file mode 100644 index 00000000..85f6acd4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml new file mode 100644 index 00000000..6cad25b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml new file mode 100644 index 00000000..fad34d46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml new file mode 100644 index 00000000..fdb07f53 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml new file mode 100644 index 00000000..21299b26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml new file mode 100644 index 00000000..551af716 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml new file mode 100644 index 00000000..2b88a867 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml new file mode 100644 index 00000000..0a416e4c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml new file mode 100644 index 00000000..d9e1cfcd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml new file mode 100644 index 00000000..9606f5a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml new file mode 100644 index 00000000..16a9320e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml new file mode 100644 index 00000000..229b5200 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml new file mode 100644 index 00000000..161fac6f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml new file mode 100644 index 00000000..2489444d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml new file mode 100644 index 00000000..ca7c5b9d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml new file mode 100644 index 00000000..538aaf90 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml new file mode 100644 index 00000000..3f7bad4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml new file mode 100644 index 00000000..66cc040e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml new file mode 100644 index 00000000..5c437352 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_05_fc +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Answer the following question with fruit that represents the options: {{question.strip()}}\n(Apple) {{choices[0]}}\n(Banana) {{choices[1]}}\n(Cantaloupe) {{choices[2]}}\n(Durian) {{choices[3]}}\nAnswer: " +doc_to_choice: "{{['(Apple) '+choices[0], '(Banana) '+choices[1], '(Cantaloupe) '+choices[2], '(Durian) '+choices[3]]}}" # "{{choices}}" # ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml new file mode 100644 index 00000000..db455b3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml new file mode 100644 index 00000000..86bbcc4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml new file mode 100644 index 00000000..73201b0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml new file mode 100644 index 00000000..3cbda755 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml new file mode 100644 index 00000000..28dbe325 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml new file mode 100644 index 00000000..56db146e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml new file mode 100644 index 00000000..aab517c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml new file mode 100644 index 00000000..1e45d4d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml new file mode 100644 index 00000000..3560688f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml new file mode 100644 index 00000000..49bce1bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml new file mode 100644 index 00000000..8d3b96e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml new file mode 100644 index 00000000..1d032b55 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml new file mode 100644 index 00000000..b40a3c3f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml new file mode 100644 index 00000000..19ee3f1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml new file mode 100644 index 00000000..71c27923 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml new file mode 100644 index 00000000..fbbde0da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml new file mode 100644 index 00000000..98560803 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml new file mode 100644 index 00000000..5a99dcbe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml new file mode 100644 index 00000000..19d0b64b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml new file mode 100644 index 00000000..fe593e1a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml new file mode 100644 index 00000000..4c0f2c9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml new file mode 100644 index 00000000..367ee3cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml new file mode 100644 index 00000000..1003c063 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml new file mode 100644 index 00000000..fb504698 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml new file mode 100644 index 00000000..7f727304 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml new file mode 100644 index 00000000..d7514f5e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml new file mode 100644 index 00000000..578e0936 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml new file mode 100644 index 00000000..290bf34e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml new file mode 100644 index 00000000..6a0f84b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml new file mode 100644 index 00000000..5f53106e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml new file mode 100644 index 00000000..a021c530 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml new file mode 100644 index 00000000..b035ff06 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml new file mode 100644 index 00000000..dd1a98fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml new file mode 100644 index 00000000..907fb5e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml new file mode 100644 index 00000000..1b7762ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml new file mode 100644 index 00000000..867f5208 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml new file mode 100644 index 00000000..b783548e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml new file mode 100644 index 00000000..5fac78f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml new file mode 100644 index 00000000..44d12096 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml new file mode 100644 index 00000000..d1012c53 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml new file mode 100644 index 00000000..97383250 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml new file mode 100644 index 00000000..ddc49653 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml new file mode 100644 index 00000000..234a386c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml new file mode 100644 index 00000000..9a112318 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml new file mode 100644 index 00000000..6198a8e7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml new file mode 100644 index 00000000..e137e42f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml new file mode 100644 index 00000000..fb7e0a83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml new file mode 100644 index 00000000..f2c5bdde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml new file mode 100644 index 00000000..7d3877ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml new file mode 100644 index 00000000..a97eabc3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml new file mode 100644 index 00000000..55a99fc6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml new file mode 100644 index 00000000..64c6762f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml new file mode 100644 index 00000000..8bd9f339 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml new file mode 100644 index 00000000..e9416042 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml new file mode 100644 index 00000000..c0bddee1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml new file mode 100644 index 00000000..2db007e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml new file mode 100644 index 00000000..fa939813 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml new file mode 100644 index 00000000..98d15de6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_01_lo +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}}\n(B) {{choices[1]}}\n(C) {{choices[2]}}\n(D) {{choices[3]}}\nA: " +doc_to_choice: ['(A)', '(B)', '(C)', '(D)'] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml new file mode 100644 index 00000000..558c5e52 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml new file mode 100644 index 00000000..b01ebedb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml new file mode 100644 index 00000000..f8eb0b16 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml new file mode 100644 index 00000000..07f094ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml new file mode 100644 index 00000000..1523a6ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml new file mode 100644 index 00000000..7da6f830 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml new file mode 100644 index 00000000..4cb4c646 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml new file mode 100644 index 00000000..a4fc1770 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml new file mode 100644 index 00000000..781a93ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml new file mode 100644 index 00000000..26c72102 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml new file mode 100644 index 00000000..18c4b6c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml new file mode 100644 index 00000000..b93acb38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml new file mode 100644 index 00000000..d817b140 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml new file mode 100644 index 00000000..2a68646b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml new file mode 100644 index 00000000..31ad2fa6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml new file mode 100644 index 00000000..29ffa115 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml new file mode 100644 index 00000000..370b1018 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml new file mode 100644 index 00000000..45f504ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml new file mode 100644 index 00000000..5fd5912d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml new file mode 100644 index 00000000..15d0c893 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml new file mode 100644 index 00000000..f124095e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml new file mode 100644 index 00000000..cbf30b84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml new file mode 100644 index 00000000..f6d7e3df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml new file mode 100644 index 00000000..9816dbcd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml new file mode 100644 index 00000000..1b819d09 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml new file mode 100644 index 00000000..0a14903e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml new file mode 100644 index 00000000..5dca1e99 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml new file mode 100644 index 00000000..7b28660c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml new file mode 100644 index 00000000..f8e66745 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml new file mode 100644 index 00000000..c2bd8b8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml new file mode 100644 index 00000000..c7c3f7b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml new file mode 100644 index 00000000..e425eb37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml new file mode 100644 index 00000000..010f23df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml new file mode 100644 index 00000000..481026d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml new file mode 100644 index 00000000..036b361a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml new file mode 100644 index 00000000..ccc6bf56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml new file mode 100644 index 00000000..a0e27f9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml new file mode 100644 index 00000000..309b2089 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml new file mode 100644 index 00000000..4927b36f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml new file mode 100644 index 00000000..28df2e3a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml new file mode 100644 index 00000000..abd21d98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml new file mode 100644 index 00000000..770938dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml new file mode 100644 index 00000000..9abe2973 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml new file mode 100644 index 00000000..c97b0702 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml new file mode 100644 index 00000000..a3cb37c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml new file mode 100644 index 00000000..362ac1b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml new file mode 100644 index 00000000..be6e7a5e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml new file mode 100644 index 00000000..75633be7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml new file mode 100644 index 00000000..f09e5a3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml new file mode 100644 index 00000000..b78942f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml new file mode 100644 index 00000000..93b289d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml new file mode 100644 index 00000000..8a9cc49c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml new file mode 100644 index 00000000..c40f85dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml new file mode 100644 index 00000000..2abcd7ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml new file mode 100644 index 00000000..87e3bd95 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml new file mode 100644 index 00000000..cca3fddd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml new file mode 100644 index 00000000..147ec4a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml new file mode 100644 index 00000000..565c4661 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_02_lo +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nA: " +doc_to_choice: ['A.', 'B.', 'C.', 'D.'] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml new file mode 100644 index 00000000..7bbc0305 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml new file mode 100644 index 00000000..6ad5fc90 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml new file mode 100644 index 00000000..921bbae3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml new file mode 100644 index 00000000..30c61b85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml new file mode 100644 index 00000000..0d828f8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml new file mode 100644 index 00000000..77208f67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml new file mode 100644 index 00000000..0b98e1c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml new file mode 100644 index 00000000..0c4fdbd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml new file mode 100644 index 00000000..6db966a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml new file mode 100644 index 00000000..0c75eb2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml new file mode 100644 index 00000000..e2207cf5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml new file mode 100644 index 00000000..edc01a6b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml new file mode 100644 index 00000000..015681ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml new file mode 100644 index 00000000..c315cff4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml new file mode 100644 index 00000000..fa370bbe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml new file mode 100644 index 00000000..f481e971 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml new file mode 100644 index 00000000..b9d6a1ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml new file mode 100644 index 00000000..a8990f95 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml new file mode 100644 index 00000000..8e16593f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml new file mode 100644 index 00000000..506127e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml new file mode 100644 index 00000000..f5fa9c49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml new file mode 100644 index 00000000..5d8a413c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml new file mode 100644 index 00000000..150affea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml new file mode 100644 index 00000000..4722d231 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml new file mode 100644 index 00000000..d14d2db6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml new file mode 100644 index 00000000..e1adf87b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml new file mode 100644 index 00000000..5d994ad4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml new file mode 100644 index 00000000..5699e2be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml new file mode 100644 index 00000000..814ae0a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml new file mode 100644 index 00000000..bf2d2d38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml new file mode 100644 index 00000000..8cefc8e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml new file mode 100644 index 00000000..901b1386 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml new file mode 100644 index 00000000..ef484740 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml new file mode 100644 index 00000000..567a0494 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml new file mode 100644 index 00000000..e09cead2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml new file mode 100644 index 00000000..7643ef29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml new file mode 100644 index 00000000..6e8d9618 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml new file mode 100644 index 00000000..aa324ad5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml new file mode 100644 index 00000000..75781818 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml new file mode 100644 index 00000000..d1a2333a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml new file mode 100644 index 00000000..8c97c867 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml new file mode 100644 index 00000000..77dd129a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml new file mode 100644 index 00000000..cbf95df9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml new file mode 100644 index 00000000..941410b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml new file mode 100644 index 00000000..b49ca05c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml new file mode 100644 index 00000000..08c23643 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml new file mode 100644 index 00000000..8106aea7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml new file mode 100644 index 00000000..75ad6a33 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml new file mode 100644 index 00000000..3c8e4e8b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml new file mode 100644 index 00000000..37b2b3a0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml new file mode 100644 index 00000000..34af2faa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml new file mode 100644 index 00000000..6d3ca3c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml new file mode 100644 index 00000000..3ef58bb8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml new file mode 100644 index 00000000..2bd3662d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml new file mode 100644 index 00000000..b268c344 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml new file mode 100644 index 00000000..1a4a27e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml new file mode 100644 index 00000000..f262df0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_02_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml new file mode 100644 index 00000000..f73e43d3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_03_lo +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA) {{choices[0]}}\nB) {{choices[1]}}\nC) {{choices[2]}}\nD) {{choices[3]}}\nA: " +doc_to_choice: ['A)', 'B)', 'C)', 'D)'] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml new file mode 100644 index 00000000..2a4eb20b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml new file mode 100644 index 00000000..81b9e739 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml new file mode 100644 index 00000000..7b8cd957 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml new file mode 100644 index 00000000..07e9dcf0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml new file mode 100644 index 00000000..5ff6bb05 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml new file mode 100644 index 00000000..69e449da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml new file mode 100644 index 00000000..7821d3eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml new file mode 100644 index 00000000..c4523a31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml new file mode 100644 index 00000000..412ac6ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml new file mode 100644 index 00000000..7ac69116 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml new file mode 100644 index 00000000..756ff002 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml new file mode 100644 index 00000000..c0ce5bb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml new file mode 100644 index 00000000..49a67c57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml new file mode 100644 index 00000000..6dab21fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml new file mode 100644 index 00000000..1020b58b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml new file mode 100644 index 00000000..741d91ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml new file mode 100644 index 00000000..b7ddf861 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml new file mode 100644 index 00000000..3cf5e788 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml new file mode 100644 index 00000000..b8fa3952 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml new file mode 100644 index 00000000..9c407ec0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml new file mode 100644 index 00000000..eb0e9358 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml new file mode 100644 index 00000000..95df7e0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml new file mode 100644 index 00000000..887fbe07 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e25c7b5c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml new file mode 100644 index 00000000..9765f81d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml new file mode 100644 index 00000000..505a0e83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml new file mode 100644 index 00000000..01ec11cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml new file mode 100644 index 00000000..804a4925 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml new file mode 100644 index 00000000..759293a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml new file mode 100644 index 00000000..f9b4593e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml new file mode 100644 index 00000000..dad11f7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml new file mode 100644 index 00000000..ef5bb47a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml new file mode 100644 index 00000000..240e0b83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml new file mode 100644 index 00000000..e90be5bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml new file mode 100644 index 00000000..24fc0971 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml new file mode 100644 index 00000000..0936c265 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml new file mode 100644 index 00000000..f4e0f297 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml new file mode 100644 index 00000000..271e9eb6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml new file mode 100644 index 00000000..1bc0eb54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml new file mode 100644 index 00000000..0255fa98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml new file mode 100644 index 00000000..18fce675 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml new file mode 100644 index 00000000..4d36997b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml new file mode 100644 index 00000000..ffca74f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml new file mode 100644 index 00000000..182287dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml new file mode 100644 index 00000000..aca00704 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml new file mode 100644 index 00000000..08b87d0f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml new file mode 100644 index 00000000..f526b91e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml new file mode 100644 index 00000000..0062e96b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml new file mode 100644 index 00000000..9e597455 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml new file mode 100644 index 00000000..820f26ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml new file mode 100644 index 00000000..cbceb92e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml new file mode 100644 index 00000000..500746af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml new file mode 100644 index 00000000..62d1409a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml new file mode 100644 index 00000000..b08ed0d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml new file mode 100644 index 00000000..94c05159 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml new file mode 100644 index 00000000..1f9e8964 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml new file mode 100644 index 00000000..92548c68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_03_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml new file mode 100644 index 00000000..471aacac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_04_lo +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA, {{choices[0]}}\nB, {{choices[1]}}\nC, {{choices[2]}}\nD, {{choices[3]}}\nA: " +doc_to_choice: ['A,', 'B,', 'C,', 'D,'] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml new file mode 100644 index 00000000..8d2f8098 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml new file mode 100644 index 00000000..e26117a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml new file mode 100644 index 00000000..3998c320 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml new file mode 100644 index 00000000..3ffa155e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml new file mode 100644 index 00000000..69ce9fa4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml new file mode 100644 index 00000000..ecc2125d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml new file mode 100644 index 00000000..7c060ed8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml new file mode 100644 index 00000000..90ff5939 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml new file mode 100644 index 00000000..2bded9b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml new file mode 100644 index 00000000..37e93c56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml new file mode 100644 index 00000000..75722ef1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml new file mode 100644 index 00000000..6698f17f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml new file mode 100644 index 00000000..f1810d32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml new file mode 100644 index 00000000..b9f5a6e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml new file mode 100644 index 00000000..2e5b0d36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml new file mode 100644 index 00000000..c56e5058 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml new file mode 100644 index 00000000..5986c2ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml new file mode 100644 index 00000000..84e3b7f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml new file mode 100644 index 00000000..07c716b0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml new file mode 100644 index 00000000..e71aa530 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml new file mode 100644 index 00000000..40b56a11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml new file mode 100644 index 00000000..dc6021fc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml new file mode 100644 index 00000000..1034fe77 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml new file mode 100644 index 00000000..3b8857ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml new file mode 100644 index 00000000..4568abab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml new file mode 100644 index 00000000..704025a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml new file mode 100644 index 00000000..4870affe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml new file mode 100644 index 00000000..95aa699e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml new file mode 100644 index 00000000..3ea1114b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml new file mode 100644 index 00000000..4c80a489 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml new file mode 100644 index 00000000..ccd9fbcb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml new file mode 100644 index 00000000..40f70676 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml new file mode 100644 index 00000000..65510f56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml new file mode 100644 index 00000000..aff1870e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml new file mode 100644 index 00000000..17333211 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml new file mode 100644 index 00000000..295f77ad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml new file mode 100644 index 00000000..76f66318 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml new file mode 100644 index 00000000..1acaf925 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml new file mode 100644 index 00000000..478800d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml new file mode 100644 index 00000000..ca1290aa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml new file mode 100644 index 00000000..a20091a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml new file mode 100644 index 00000000..75486c12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml new file mode 100644 index 00000000..c7b6dc5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml new file mode 100644 index 00000000..6c5214d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml new file mode 100644 index 00000000..82696658 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml new file mode 100644 index 00000000..b1c5c0ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml new file mode 100644 index 00000000..d9833825 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml new file mode 100644 index 00000000..28a00e14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml new file mode 100644 index 00000000..c9a9be10 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml new file mode 100644 index 00000000..6696d250 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml new file mode 100644 index 00000000..f5ea9d3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml new file mode 100644 index 00000000..9eb5b35b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml new file mode 100644 index 00000000..427c7679 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml new file mode 100644 index 00000000..2d00d5c9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml new file mode 100644 index 00000000..2f6c7243 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml new file mode 100644 index 00000000..ce0cd658 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml new file mode 100644 index 00000000..9828954e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_04_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml new file mode 100644 index 00000000..a95d6de5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_05_lo +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: "Answer the following question with fruit that represents the options: {{question.strip()}}\n(Apple) {{choices[0]}}\n(Banana) {{choices[1]}}\n(Cantaloupe) {{choices[2]}}\n(Durian) {{choices[3]}}\nAnswer: " +doc_to_choice: ['(Apple)', '(Banana)', '(Cantaloupe)', '(Durian)'] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml new file mode 100644 index 00000000..4b032a41 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml new file mode 100644 index 00000000..a634aced --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml new file mode 100644 index 00000000..1701ab8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml new file mode 100644 index 00000000..317c27bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml new file mode 100644 index 00000000..7694fa1a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml new file mode 100644 index 00000000..8ef40714 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml new file mode 100644 index 00000000..805f9fa4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml new file mode 100644 index 00000000..bb51328c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml new file mode 100644 index 00000000..19d655f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml new file mode 100644 index 00000000..b99d2df0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml new file mode 100644 index 00000000..f5f13fbc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml new file mode 100644 index 00000000..54c44811 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml new file mode 100644 index 00000000..90678e03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml new file mode 100644 index 00000000..471530eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml new file mode 100644 index 00000000..0dc9e1aa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml new file mode 100644 index 00000000..f2e08287 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml new file mode 100644 index 00000000..7d965de1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml new file mode 100644 index 00000000..4854f37b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml new file mode 100644 index 00000000..eef05492 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml new file mode 100644 index 00000000..fbfaf14e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml new file mode 100644 index 00000000..fd2e7d92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml new file mode 100644 index 00000000..0162142a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml new file mode 100644 index 00000000..4477cce5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml new file mode 100644 index 00000000..de1aab96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml new file mode 100644 index 00000000..e0a9e76f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml new file mode 100644 index 00000000..b5d2a35e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml new file mode 100644 index 00000000..652a999a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml new file mode 100644 index 00000000..bd9223c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml new file mode 100644 index 00000000..11d44aa7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml new file mode 100644 index 00000000..25ec5d75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml new file mode 100644 index 00000000..637beb83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml new file mode 100644 index 00000000..0167efc4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml new file mode 100644 index 00000000..455c9d1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml new file mode 100644 index 00000000..70e40d7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml new file mode 100644 index 00000000..4b26da35 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml new file mode 100644 index 00000000..efeef8f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml new file mode 100644 index 00000000..5495dd43 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml new file mode 100644 index 00000000..ff028543 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml new file mode 100644 index 00000000..d306e0dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml new file mode 100644 index 00000000..2843c86a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml new file mode 100644 index 00000000..056600b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml new file mode 100644 index 00000000..e33009b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml new file mode 100644 index 00000000..89565096 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml new file mode 100644 index 00000000..02d93244 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml new file mode 100644 index 00000000..056cb1be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml new file mode 100644 index 00000000..83195d6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml new file mode 100644 index 00000000..ed40e6cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml new file mode 100644 index 00000000..f9450fe7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml new file mode 100644 index 00000000..d2d2b1e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml new file mode 100644 index 00000000..bc808a89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml new file mode 100644 index 00000000..bc5a4d36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml new file mode 100644 index 00000000..62afb7b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml new file mode 100644 index 00000000..bf2bb7c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml new file mode 100644 index 00000000..801db8ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml new file mode 100644 index 00000000..8402d83d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml new file mode 100644 index 00000000..9cf4d92b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml new file mode 100644 index 00000000..6697992e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_05_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml new file mode 100644 index 00000000..f22c500e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml @@ -0,0 +1,7 @@ +group: alternative_worlds_mmlu_fc +task: + - mmlu_style_01_fc + - mmlu_style_02_fc + - mmlu_style_03_fc + - mmlu_style_04_fc + - mmlu_style_05_fc \ No newline at end of file diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml new file mode 100644 index 00000000..0de2da42 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml @@ -0,0 +1,7 @@ +group: alternative_worlds_mmlu_lo +task: + - mmlu_style_01_lo + - mmlu_style_02_lo + - mmlu_style_03_lo + - mmlu_style_04_lo + - mmlu_style_05_lo \ No newline at end of file -- GitLab From 98f9bac986d688bddb345a915ba6dc8b712246fe Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 6 Nov 2023 11:01:16 +0000 Subject: [PATCH 02/50] add brier_score --- lm_eval/api/metrics.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 69e66fdc..adf2ce30 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -7,6 +7,7 @@ import sklearn.metrics import random import evaluate +from Levenshtein import distance from lm_eval.api.registry import register_metric, register_aggregation @@ -106,6 +107,27 @@ def ter(items): return sacrebleu.corpus_ter(preds, refs).score +@register_aggregation("brier_score") +def brier_score(items): # This is a passthrough function + gold = list(zip(*items))[0] + gold_one_hot = np.eye(max(gold)+1)[gold] + predictions = list(zip(*items))[1] + print("predictions", prediction) + print("gold_one_hot", gold_one_hot) + import sys; sys.exit() + return np.mean(np.sum((predictions - gold_one_hot)**2, axis=1)) + + +@register_metric( + metric="brier_score", + higher_is_better=False, + output_type=["multiple_choice"], + aggregation="brier_score", +) +def brier_score_fn(items): # This is a passthrough function + return items + + @register_metric( metric="acc", higher_is_better=True, @@ -139,6 +161,18 @@ def acc_mutual_info_fn(items): # This is a passthrough function exact_match = evaluate.load("exact_match") +# @register_metric( +# metric="token_edit_distance", +# higher_is_better=False, +# output_type=["generate_until"], +# aggregation="mean", +# ) +# def ted_fn(items): # This is a passthrough function + +# references, predictions = items +# return distance(references, predictions) + + @register_metric( metric="exact_match", higher_is_better=True, -- GitLab From a76754ff4b09ad199aa7d5500cb3e3d416205c70 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 6 Nov 2023 11:01:30 +0000 Subject: [PATCH 03/50] process brier_score --- lm_eval/api/task.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 883c643d..613e52ae 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -972,7 +972,10 @@ class ConfigurableTask(Task): def process_results(self, doc, results): if callable(self.config.process_results): - return self.config.process_results(doc, results) + try: + return self.config.process_results(self, doc, results) + except: + return self.config.process_results(doc, results) result_dict = {} use_metric = list(self._metric_fn_list.keys()) @@ -1060,12 +1063,15 @@ class ConfigurableTask(Task): # TODO: this gets score of 0 on arc_challenge for pythia-70m. need to test that this works properly exact_match = int(is_greedy[gold]) if gold != -100 else 0 + prob_norm = [float(i)/sum(lls) for i in lls] + result_dict = { **({"acc": acc} if "acc" in use_metric else {}), **({"f1": (gold, pred)} if "f1" in use_metric else {}), **({"mcc": (gold, pred)} if "mcc" in use_metric else {}), **({"acc_norm": acc_norm} if "acc_norm" in use_metric else {}), **({"exact_match": exact_match} if "exact_match" in use_metric else {}), + **({"brier_score": (gold, prob_norm)} if "brier_score" in use_metric else {}), } if "acc_mutual_info" in use_metric: -- GitLab From d49636a3025e9d3f2d020019c07276758fabb479 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 6 Nov 2023 11:12:17 +0000 Subject: [PATCH 04/50] brier score is working for N-sized class --- lm_eval/api/metrics.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index adf2ce30..0f36435c 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -109,12 +109,10 @@ def ter(items): @register_aggregation("brier_score") def brier_score(items): # This is a passthrough function - gold = list(zip(*items))[0] - gold_one_hot = np.eye(max(gold)+1)[gold] + gold, predictions = list(zip(*items)) + gold = list(gold) + gold_one_hot = np.eye(np.max(gold)+1)[gold] predictions = list(zip(*items))[1] - print("predictions", prediction) - print("gold_one_hot", gold_one_hot) - import sys; sys.exit() return np.mean(np.sum((predictions - gold_one_hot)**2, axis=1)) -- GitLab From 5cc65a798a6348779bfe0854e7cb1e449a79228f Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Mon, 6 Nov 2023 16:25:30 +0000 Subject: [PATCH 05/50] fxied brier score --- lm_eval/api/metrics.py | 12 ------------ lm_eval/api/task.py | 4 +++- lm_eval/utils.py | 7 +++++++ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 0f36435c..16e2b189 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -159,18 +159,6 @@ def acc_mutual_info_fn(items): # This is a passthrough function exact_match = evaluate.load("exact_match") -# @register_metric( -# metric="token_edit_distance", -# higher_is_better=False, -# output_type=["generate_until"], -# aggregation="mean", -# ) -# def ted_fn(items): # This is a passthrough function - -# references, predictions = items -# return distance(references, predictions) - - @register_metric( metric="exact_match", higher_is_better=True, diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 613e52ae..dbcd50dc 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1063,8 +1063,10 @@ class ConfigurableTask(Task): # TODO: this gets score of 0 on arc_challenge for pythia-70m. need to test that this works properly exact_match = int(is_greedy[gold]) if gold != -100 else 0 - prob_norm = [float(i)/sum(lls) for i in lls] + prob_norm = utils.softmax(lls) + # TODO use keyword arguments to the metric? + # gold, pred, norm stuff, the original lls, result_dict = { **({"acc": acc} if "acc" in use_metric else {}), **({"f1": (gold, pred)} if "f1" in use_metric else {}), diff --git a/lm_eval/utils.py b/lm_eval/utils.py index d246470a..514e49f4 100644 --- a/lm_eval/utils.py +++ b/lm_eval/utils.py @@ -15,6 +15,7 @@ from typing import Iterator, List, Literal, Union import gc import torch import transformers +import numpy as np from jinja2 import BaseLoader, Environment, StrictUndefined from itertools import islice @@ -127,6 +128,12 @@ def pattern_match(patterns, source_list): return sorted(list(task_names)) +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() + + def general_detokenize(string): string = string.replace(" n't", "n't") string = string.replace(" )", ")") -- GitLab From 2b7d8c2da5d6cb2acb6bf117c420e9402c24ade7 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Tue, 7 Nov 2023 02:13:22 +0000 Subject: [PATCH 06/50] add TED to BigBench and Brier score to MMLU --- lm_eval/tasks/bigbench/aux_metric.py | 10 ++++++++++ lm_eval/tasks/bigbench/generate_until_template_yaml | 5 ++++- lm_eval/tasks/mmlu/default/_default_template_yaml | 3 +++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 lm_eval/tasks/bigbench/aux_metric.py diff --git a/lm_eval/tasks/bigbench/aux_metric.py b/lm_eval/tasks/bigbench/aux_metric.py new file mode 100644 index 00000000..4a92c41e --- /dev/null +++ b/lm_eval/tasks/bigbench/aux_metric.py @@ -0,0 +1,10 @@ +from textdistance import levenshtein +from transformers import AutoTokenizer + +# Change this tokenizer to fit with the model you are using. +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b") + +def token_edit_distance(references, predictions, **kwargs): + ref_tokens = tokenizer.encode(references[0]) + pred_tokens = tokenizer.encode(predictions[0]) + return levenshtein.distance(ref_tokens, pred_tokens) diff --git a/lm_eval/tasks/bigbench/generate_until_template_yaml b/lm_eval/tasks/bigbench/generate_until_template_yaml index ebce0377..99f44525 100644 --- a/lm_eval/tasks/bigbench/generate_until_template_yaml +++ b/lm_eval/tasks/bigbench/generate_until_template_yaml @@ -1,5 +1,5 @@ group: bigbench -dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed +dataset_path: hails/bigbench # will switch to `hails/bigbench` when all tasks are pushed output_type: generate_until dataset_kwargs: # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods @@ -14,3 +14,6 @@ metric_list: aggregation: mean higher_is_better: true ignore_punctuation: true + - metric: !function aux_metric.token_edit_distance # pip install textdistance + aggregation: mean + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/mmlu/default/_default_template_yaml b/lm_eval/tasks/mmlu/default/_default_template_yaml index af4bf12c..e24cb508 100644 --- a/lm_eval/tasks/mmlu/default/_default_template_yaml +++ b/lm_eval/tasks/mmlu/default/_default_template_yaml @@ -15,3 +15,6 @@ metric_list: - metric: acc_norm aggregation: mean higher_is_better: true + - metric: brier_score + aggregation: mean + higher_is_better: false \ No newline at end of file -- GitLab From 0a39d0556e0724314fe758cf251142db10a6d3e0 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Tue, 7 Nov 2023 02:23:46 +0000 Subject: [PATCH 07/50] format --- docs/new_task_guide.md | 2 +- lm_eval/api/metrics.py | 4 ++-- lm_eval/api/task.py | 8 ++++++-- lm_eval/tasks/bigbench/aux_metric.py | 6 +++++- lm_eval/tasks/bigbench/generate_until_template_yaml | 2 +- lm_eval/tasks/mmlu/default/_default_template_yaml | 2 +- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/new_task_guide.md b/docs/new_task_guide.md index 6b30bfc9..86966be5 100644 --- a/docs/new_task_guide.md +++ b/docs/new_task_guide.md @@ -50,7 +50,7 @@ dataset_kwargs: null # any extra keyword arguments that should be passed to the ``` dataset_path: json dataset_name: null -dataset_kwargs: +dataset_kwargs: data_files: /path/to/my/json ``` ------------------------------- diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 16e2b189..f1f544c9 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -111,9 +111,9 @@ def ter(items): def brier_score(items): # This is a passthrough function gold, predictions = list(zip(*items)) gold = list(gold) - gold_one_hot = np.eye(np.max(gold)+1)[gold] + gold_one_hot = np.eye(np.max(gold) + 1)[gold] predictions = list(zip(*items))[1] - return np.mean(np.sum((predictions - gold_one_hot)**2, axis=1)) + return np.mean(np.sum((predictions - gold_one_hot) ** 2, axis=1)) @register_metric( diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index dbcd50dc..b699ca12 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1066,14 +1066,18 @@ class ConfigurableTask(Task): prob_norm = utils.softmax(lls) # TODO use keyword arguments to the metric? - # gold, pred, norm stuff, the original lls, + # gold, pred, norm stuff, the original lls, result_dict = { **({"acc": acc} if "acc" in use_metric else {}), **({"f1": (gold, pred)} if "f1" in use_metric else {}), **({"mcc": (gold, pred)} if "mcc" in use_metric else {}), **({"acc_norm": acc_norm} if "acc_norm" in use_metric else {}), **({"exact_match": exact_match} if "exact_match" in use_metric else {}), - **({"brier_score": (gold, prob_norm)} if "brier_score" in use_metric else {}), + **( + {"brier_score": (gold, prob_norm)} + if "brier_score" in use_metric + else {} + ), } if "acc_mutual_info" in use_metric: diff --git a/lm_eval/tasks/bigbench/aux_metric.py b/lm_eval/tasks/bigbench/aux_metric.py index 4a92c41e..bd87fbec 100644 --- a/lm_eval/tasks/bigbench/aux_metric.py +++ b/lm_eval/tasks/bigbench/aux_metric.py @@ -2,9 +2,13 @@ from textdistance import levenshtein from transformers import AutoTokenizer # Change this tokenizer to fit with the model you are using. -tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b") +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-2.8b", max_new_tokens=128) + def token_edit_distance(references, predictions, **kwargs): + print(references) + print(predictions) + print("###") ref_tokens = tokenizer.encode(references[0]) pred_tokens = tokenizer.encode(predictions[0]) return levenshtein.distance(ref_tokens, pred_tokens) diff --git a/lm_eval/tasks/bigbench/generate_until_template_yaml b/lm_eval/tasks/bigbench/generate_until_template_yaml index 99f44525..9e6b5265 100644 --- a/lm_eval/tasks/bigbench/generate_until_template_yaml +++ b/lm_eval/tasks/bigbench/generate_until_template_yaml @@ -16,4 +16,4 @@ metric_list: ignore_punctuation: true - metric: !function aux_metric.token_edit_distance # pip install textdistance aggregation: mean - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/mmlu/default/_default_template_yaml b/lm_eval/tasks/mmlu/default/_default_template_yaml index e24cb508..4e017ff6 100644 --- a/lm_eval/tasks/mmlu/default/_default_template_yaml +++ b/lm_eval/tasks/mmlu/default/_default_template_yaml @@ -17,4 +17,4 @@ metric_list: higher_is_better: true - metric: brier_score aggregation: mean - higher_is_better: false \ No newline at end of file + higher_is_better: false -- GitLab From 59b5471c9bb832d056bd1d2668798a822a66f322 Mon Sep 17 00:00:00 2001 From: Lintang Sutawika Date: Wed, 8 Nov 2023 11:02:06 +0700 Subject: [PATCH 08/50] Update metrics.py --- lm_eval/api/metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index f1f544c9..be4d6f0b 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -7,7 +7,6 @@ import sklearn.metrics import random import evaluate -from Levenshtein import distance from lm_eval.api.registry import register_metric, register_aggregation -- GitLab From 66b3c3a2e50d45ca6c1ecc7f35f4c702ca7b3040 Mon Sep 17 00:00:00 2001 From: Lintang Sutawika Date: Wed, 8 Nov 2023 11:02:41 +0700 Subject: [PATCH 09/50] Update task.py --- lm_eval/api/task.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index eac914ce..c35808d7 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -953,10 +953,7 @@ class ConfigurableTask(Task): def process_results(self, doc, results): if callable(self.config.process_results): - try: - return self.config.process_results(self, doc, results) - except: - return self.config.process_results(doc, results) + return self.config.process_results(doc, results) result_dict = {} use_metric = list(self._metric_fn_list.keys()) -- GitLab From 1522009c6d9b05bd0aae2e83d6ce7b7418affc5e Mon Sep 17 00:00:00 2001 From: Lintang Sutawika Date: Wed, 8 Nov 2023 11:03:16 +0700 Subject: [PATCH 10/50] Update generate_until_template_yaml --- lm_eval/tasks/bigbench/generate_until_template_yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/bigbench/generate_until_template_yaml b/lm_eval/tasks/bigbench/generate_until_template_yaml index 9e6b5265..2e4db21f 100644 --- a/lm_eval/tasks/bigbench/generate_until_template_yaml +++ b/lm_eval/tasks/bigbench/generate_until_template_yaml @@ -1,5 +1,5 @@ group: bigbench -dataset_path: hails/bigbench # will switch to `hails/bigbench` when all tasks are pushed +dataset_path: bigbench # will switch to `hails/bigbench` when all tasks are pushed output_type: generate_until dataset_kwargs: # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods -- GitLab From 6998762aef308e58d332c1e939fd8ae2b9c43391 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Thu, 9 Nov 2023 07:19:32 +0000 Subject: [PATCH 11/50] merged cont-metrics here --- lm_eval/api/metrics.py | 6 +++--- lm_eval/evaluator.py | 2 ++ .../full_continuation/style_01/_template_yaml | 4 +--- .../full_continuation/style_02/_template_yaml | 4 +--- .../full_continuation/style_03/_template_yaml | 4 +--- .../full_continuation/style_04/_template_yaml | 4 +--- .../full_continuation/style_05/_template_yaml | 4 +--- .../alternative_worlds/letters_only/style_01/_template_yaml | 4 +--- .../alternative_worlds/letters_only/style_02/_template_yaml | 4 +--- .../alternative_worlds/letters_only/style_03/_template_yaml | 4 +--- .../alternative_worlds/letters_only/style_04/_template_yaml | 4 +--- .../alternative_worlds/letters_only/style_05/_template_yaml | 4 +--- 12 files changed, 15 insertions(+), 33 deletions(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index be4d6f0b..3aea5840 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -109,9 +109,9 @@ def ter(items): @register_aggregation("brier_score") def brier_score(items): # This is a passthrough function gold, predictions = list(zip(*items)) - gold = list(gold) - gold_one_hot = np.eye(np.max(gold) + 1)[gold] - predictions = list(zip(*items))[1] + gold = np.array(gold) + predictions = np.array(predictions) + gold_one_hot = np.eye(len(predictions[0]))[gold] return np.mean(np.sum((predictions - gold_one_hot) ** 2, axis=1)) diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index 3fa9633e..ec42ba77 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -468,6 +468,8 @@ def evaluate( if stderr is not None: results[task_name][metric + "_stderr" + "," + key] = stderr(items) + else: + results[task_name][metric + "_stderr" + "," + key] = 0 if bool(results): diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml index 1a5cb4cb..a42ebf3d 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml index 396cf875..5ba05e51 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml index 074aeef0..1f7f90cf 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml index 250705e9..86296725 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml index 5c437352..e61f63e3 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml index 98d15de6..5d605a67 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml index 565c4661..9f444a5c 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml index f73e43d3..0c9200d8 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml index 471aacac..c0dfb525 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml index a95d6de5..cf0c1ed3 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml @@ -10,6 +10,4 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true + - metric: brier_score -- GitLab From 5e4f17990f2c4d7a1205d4d8fac8ace23bb75ae7 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Tue, 14 Nov 2023 11:48:09 +0000 Subject: [PATCH 12/50] brier score for loglikelihood task --- lm_eval/api/task.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index c35808d7..0c486547 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -960,9 +960,16 @@ class ConfigurableTask(Task): if self.OUTPUT_TYPE == "loglikelihood": results = results[0] ll, is_greedy = results + prob_norm = np.exp(ll) + return { **({"perplexity": ll} if "perplexity" in use_metric else {}), **({"acc": int(is_greedy)} if "acc" in use_metric else {}), + **( + {"brier_score": (0, [prob_norm])} # Gold is Index 0 + if "brier_score" in use_metric + else {} + ), } elif self.OUTPUT_TYPE == "loglikelihood_rolling": (loglikelihood,) = results -- GitLab From c25f6a3188c1c6b519a718b0ee8f4b33222cde50 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Tue, 14 Nov 2023 11:48:32 +0000 Subject: [PATCH 13/50] alternative prompts for arithmetic --- .../alternative_worlds/_template_yaml | 13 +++++++++ .../alternative_worlds/arithmetic_alt.yaml | 7 +++++ .../style_01/_template_01_yaml | 15 ++++++++++ .../style_01/arithmetic_1dc.yaml | 3 ++ .../style_01/arithmetic_2da.yaml | 4 +++ .../style_01/arithmetic_2dm.yaml | 4 +++ .../style_01/arithmetic_2ds.yaml | 4 +++ .../style_01/arithmetic_3da.yaml | 4 +++ .../style_01/arithmetic_3ds.yaml | 4 +++ .../style_01/arithmetic_4da.yaml | 4 +++ .../style_01/arithmetic_4ds.yaml | 4 +++ .../style_01/arithmetic_5da.yaml | 4 +++ .../style_01/arithmetic_5ds.yaml | 4 +++ .../style_02/_template_02_yaml | 15 ++++++++++ .../style_02/arithmetic_1dc.yaml | 4 +++ .../style_02/arithmetic_2da.yaml | 4 +++ .../style_02/arithmetic_2dm.yaml | 4 +++ .../style_02/arithmetic_2ds.yaml | 4 +++ .../style_02/arithmetic_3da.yaml | 4 +++ .../style_02/arithmetic_3ds.yaml | 4 +++ .../style_02/arithmetic_4da.yaml | 4 +++ .../style_02/arithmetic_4ds.yaml | 4 +++ .../style_02/arithmetic_5da.yaml | 4 +++ .../style_02/arithmetic_5ds.yaml | 4 +++ .../style_03/_template_03_yaml | 15 ++++++++++ .../style_03/arithmetic_1dc.yaml | 4 +++ .../style_03/arithmetic_2da.yaml | 4 +++ .../style_03/arithmetic_2dm.yaml | 4 +++ .../style_03/arithmetic_2ds.yaml | 4 +++ .../style_03/arithmetic_3da.yaml | 4 +++ .../style_03/arithmetic_3ds.yaml | 4 +++ .../style_03/arithmetic_4da.yaml | 4 +++ .../style_03/arithmetic_4ds.yaml | 4 +++ .../style_03/arithmetic_5da.yaml | 4 +++ .../style_03/arithmetic_5ds.yaml | 4 +++ .../style_04/_template_04_yaml | 15 ++++++++++ .../style_04/arithmetic_1dc.yaml | 4 +++ .../style_04/arithmetic_2da.yaml | 4 +++ .../style_04/arithmetic_2dm.yaml | 4 +++ .../style_04/arithmetic_2ds.yaml | 4 +++ .../style_04/arithmetic_3da.yaml | 4 +++ .../style_04/arithmetic_3ds.yaml | 4 +++ .../style_04/arithmetic_4da.yaml | 4 +++ .../style_04/arithmetic_4ds.yaml | 4 +++ .../style_04/arithmetic_5da.yaml | 4 +++ .../style_04/arithmetic_5ds.yaml | 4 +++ .../style_05/_template_05_yaml | 15 ++++++++++ .../style_05/arithmetic_1dc.yaml | 4 +++ .../style_05/arithmetic_2da.yaml | 4 +++ .../style_05/arithmetic_2dm.yaml | 4 +++ .../style_05/arithmetic_2ds.yaml | 4 +++ .../style_05/arithmetic_3da.yaml | 4 +++ .../style_05/arithmetic_3ds.yaml | 4 +++ .../style_05/arithmetic_4da.yaml | 4 +++ .../style_05/arithmetic_4ds.yaml | 4 +++ .../style_05/arithmetic_5da.yaml | 4 +++ .../style_05/arithmetic_5ds.yaml | 4 +++ .../arithmetic/alternative_worlds/utils.py | 29 +++++++++++++++++++ 58 files changed, 323 insertions(+) create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml create mode 100644 lm_eval/tasks/arithmetic/alternative_worlds/utils.py diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml new file mode 100644 index 00000000..16a5177e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml @@ -0,0 +1,13 @@ +dataset_path: EleutherAI/arithmetic +dataset_name: arithmetic_1dc +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: "{{context}}" +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml new file mode 100644 index 00000000..85584460 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml @@ -0,0 +1,7 @@ +group: arithmetic_alt +task: + - arithmetic_alt_01 + - arithmetic_alt_02 + - arithmetic_alt_03 + - arithmetic_alt_04 + - arithmetic_alt_05 \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml new file mode 100644 index 00000000..60a1ee13 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_01 +group_alias: arithmetic (Style 01) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_01 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml new file mode 100644 index 00000000..7d51e94c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml @@ -0,0 +1,3 @@ +include: _template_01_yaml +task: arithmetic_1dc_alt_01 +dataset_name: arithmetic_1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml new file mode 100644 index 00000000..2b3f8bd4 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_2da_alt_01 +dataset_name: arithmetic_2da +task_alias: 2da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml new file mode 100644 index 00000000..92b0521a --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_2dm_alt_01 +dataset_name: arithmetic_2dm +task_alias: 2dm \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml new file mode 100644 index 00000000..b44a5556 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_2ds_alt_01 +dataset_name: arithmetic_2ds +task_alias: 2ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml new file mode 100644 index 00000000..7f8e8b2c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_3da_alt_01 +dataset_name: arithmetic_3da +task_alias: 3da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml new file mode 100644 index 00000000..eb604704 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_3ds_alt_01 +dataset_name: arithmetic_3ds +task_alias: 3ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml new file mode 100644 index 00000000..abad02c2 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_4da_alt_01 +dataset_name: arithmetic_4da +task_alias: 4da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml new file mode 100644 index 00000000..0b022c46 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_4ds_alt_01 +dataset_name: arithmetic_4ds +task_alias: 4ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml new file mode 100644 index 00000000..21e28815 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_5da_alt_01 +dataset_name: arithmetic_5da +task_alias: 5da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml new file mode 100644 index 00000000..d3622a86 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_01_yaml +task: arithmetic_5ds_alt_01 +dataset_name: arithmetic_5ds +task_alias: 5ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml new file mode 100644 index 00000000..8db3ca7c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_02 +group_alias: arithmetic (Style 02) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_02 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml new file mode 100644 index 00000000..33f2b064 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_1dc_alt_02 +dataset_name: arithmetic_1dc +task_alias: 1dc \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml new file mode 100644 index 00000000..4cf1304e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_2da_alt_02 +dataset_name: arithmetic_2da +task_alias: 2da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml new file mode 100644 index 00000000..41ceaa84 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_2dm_alt_02 +dataset_name: arithmetic_2dm +task_alias: 2dm \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml new file mode 100644 index 00000000..dbbd41dc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_2ds_alt_02 +dataset_name: arithmetic_2ds +task_alias: 2ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml new file mode 100644 index 00000000..e39181e8 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_3da_alt_02 +dataset_name: arithmetic_3da +task_alias: 3da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml new file mode 100644 index 00000000..5e643bcb --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_3ds_alt_02 +dataset_name: arithmetic_3ds +task_alias: 3ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml new file mode 100644 index 00000000..a57fbdff --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_4da_alt_02 +dataset_name: arithmetic_4da +task_alias: 4da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml new file mode 100644 index 00000000..baef145d --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_4ds_alt_02 +dataset_name: arithmetic_4ds +task_alias: 4ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml new file mode 100644 index 00000000..f35f2c56 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_5da_alt_02 +dataset_name: arithmetic_5da +task_alias: 5da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml new file mode 100644 index 00000000..8932c906 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_02_yaml +task: arithmetic_5ds_alt_02 +dataset_name: arithmetic_5ds +task_alias: 5ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml new file mode 100644 index 00000000..7e9862e9 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_03 +group_alias: arithmetic (Style 03) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_03 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml new file mode 100644 index 00000000..1dbf243f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_1dc_alt_03 +dataset_name: arithmetic_1dc +task_alias: 1dc \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml new file mode 100644 index 00000000..c2d8b41a --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_2da_alt_03 +dataset_name: arithmetic_2da +task_alias: 2da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml new file mode 100644 index 00000000..4e7a986e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_2dm_alt_03 +dataset_name: arithmetic_2dm +task_alias: 2dm \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml new file mode 100644 index 00000000..626d1d93 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_2ds_alt_03 +dataset_name: arithmetic_2ds +task_alias: 2ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml new file mode 100644 index 00000000..4dac4f85 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_3da_alt_03 +dataset_name: arithmetic_3da +task_alias: 3da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml new file mode 100644 index 00000000..b8ccc17f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_3ds_alt_03 +dataset_name: arithmetic_3ds +task_alias: 3ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml new file mode 100644 index 00000000..b4312343 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_4da_alt_03 +dataset_name: arithmetic_4da +task_alias: 4da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml new file mode 100644 index 00000000..6a6e173c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_4ds_alt_03 +dataset_name: arithmetic_4ds +task_alias: 4ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml new file mode 100644 index 00000000..6fff06bc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_5da_alt_03 +dataset_name: arithmetic_5da +task_alias: 5da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml new file mode 100644 index 00000000..0d52562f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_03_yaml +task: arithmetic_5ds_alt_03 +dataset_name: arithmetic_5ds +task_alias: 5ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml new file mode 100644 index 00000000..b21fa53c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_04 +group_alias: arithmetic (Style 04) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_04 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml new file mode 100644 index 00000000..b9dd9b44 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_1dc_alt_04 +dataset_name: arithmetic_1dc +task_alias: 1dc \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml new file mode 100644 index 00000000..3a45ade3 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_2da_alt_04 +dataset_name: arithmetic_2da +task_alias: 2da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml new file mode 100644 index 00000000..d2ef1777 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_2dm_alt_04 +dataset_name: arithmetic_2dm +task_alias: 2dm \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml new file mode 100644 index 00000000..9bd31b5e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_2ds_alt_04 +dataset_name: arithmetic_2ds +task_alias: 2ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml new file mode 100644 index 00000000..c72f5526 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_3da_alt_04 +dataset_name: arithmetic_3da +task_alias: 3da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml new file mode 100644 index 00000000..94cf4666 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_3ds_alt_04 +dataset_name: arithmetic_3ds +task_alias: 3ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml new file mode 100644 index 00000000..00031d76 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_4da_alt_04 +dataset_name: arithmetic_4da +task_alias: 4da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml new file mode 100644 index 00000000..95dd6f2d --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_4ds_alt_04 +dataset_name: arithmetic_4ds +task_alias: 4ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml new file mode 100644 index 00000000..6a667d33 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_5da_alt_04 +dataset_name: arithmetic_5da +task_alias: 5da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml new file mode 100644 index 00000000..afa3996f --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_04_yaml +task: arithmetic_5ds_alt_04 +dataset_name: arithmetic_5ds +task_alias: 5ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml new file mode 100644 index 00000000..c6c06737 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml @@ -0,0 +1,15 @@ +include: ../_template_yaml +group: arithmetic_alt_05 +group_alias: arithmetic (Style 05) +dataset_path: EleutherAI/arithmetic +output_type: loglikelihood +validation_split: validation +test_split: null +doc_to_text: !function ../utils.style_05 +doc_to_target: "{{completion}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml new file mode 100644 index 00000000..3b98b8c7 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_1dc_alt_05 +dataset_name: arithmetic_1dc +task_alias: 1dc \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml new file mode 100644 index 00000000..e53560b3 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_2da_alt_05 +dataset_name: arithmetic_2da +task_alias: 2da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml new file mode 100644 index 00000000..ab0be4b8 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_2dm_alt_05 +dataset_name: arithmetic_2dm +task_alias: 2dm \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml new file mode 100644 index 00000000..e95734e5 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_2ds_alt_05 +dataset_name: arithmetic_2ds +task_alias: 2ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml new file mode 100644 index 00000000..3052c7dc --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_3da_alt_05 +dataset_name: arithmetic_3da +task_alias: 3da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml new file mode 100644 index 00000000..050dae04 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_3ds_alt_05 +dataset_name: arithmetic_3ds +task_alias: 3ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml new file mode 100644 index 00000000..5d764d26 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_4da_alt_05 +dataset_name: arithmetic_4da +task_alias: 4da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml new file mode 100644 index 00000000..3915be0e --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_4ds_alt_05 +dataset_name: arithmetic_4ds +task_alias: 4ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml new file mode 100644 index 00000000..9ede053c --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_5da_alt_05 +dataset_name: arithmetic_5da +task_alias: 5da \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml new file mode 100644 index 00000000..7841b717 --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml @@ -0,0 +1,4 @@ +include: _template_05_yaml +task: arithmetic_5ds_alt_05 +dataset_name: arithmetic_5ds +task_alias: 5ds \ No newline at end of file diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py new file mode 100644 index 00000000..f8cd32db --- /dev/null +++ b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py @@ -0,0 +1,29 @@ +import re + +# Original Prompt +# Question: What is (9 + 8) * 2? Answer: + +def style_01(docs): + + # What is (9 + 8) * 2? + return docs["context"].replace("Question: ", "").replace(" Answer:", "") + +def style_02(docs): + + # Q: What is (9 + 8) * 2? A: + return docs["context"].replace("Question: ", "Q: ").replace(" Answer:", " A:") + +def style_03(docs): + + # Solve (9 + 8) * 2. + return docs["context"].replace("Question: What is", "Solve").replace(" Answer:", ".") + +def style_04(docs): + + # (9 + 8) * 2 = + return docs["context"].replace("Question: What is ", "").replace(" Answer:", " =") + +def style_05(docs): + + # What is (9 + 8) * 2? Answer: + return docs["context"].replace("Question: ", "") \ No newline at end of file -- GitLab From f8740ff4d194c90453065befa3c22b7381244dd1 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Tue, 14 Nov 2023 13:35:50 +0000 Subject: [PATCH 14/50] add task_alias --- .../arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml index 7d51e94c..0134f80f 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml @@ -1,3 +1,4 @@ include: _template_01_yaml task: arithmetic_1dc_alt_01 dataset_name: arithmetic_1dc +task_alias: 1dc \ No newline at end of file -- GitLab From 7cd1555e5291b68c7309ac52ea09912a847c5f4f Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 17 Nov 2023 03:40:47 +0000 Subject: [PATCH 15/50] added alternative prompts for hellaswag --- .../hellaswag/alternative_worlds/README.md | 20 +++++ .../alternative_worlds/_hellaswag_alt_yaml | 16 ++++ .../alternative_worlds/hellaswag_alt.yaml | 10 +++ .../alternative_worlds/style_01/a.yaml | 7 ++ .../alternative_worlds/style_01/b.yaml | 7 ++ .../alternative_worlds/style_01/c.yaml | 7 ++ .../alternative_worlds/style_02/a.yaml | 7 ++ .../alternative_worlds/style_02/b.yaml | 7 ++ .../alternative_worlds/style_02/c.yaml | 7 ++ .../alternative_worlds/style_03/a.yaml | 7 ++ .../alternative_worlds/style_03/b.yaml | 7 ++ .../alternative_worlds/style_03/c.yaml | 7 ++ .../alternative_worlds/style_04/a.yaml | 7 ++ .../alternative_worlds/style_04/b.yaml | 7 ++ .../alternative_worlds/style_04/c.yaml | 7 ++ .../alternative_worlds/style_05/a.yaml | 7 ++ .../alternative_worlds/style_05/b.yaml | 7 ++ .../alternative_worlds/style_05/c.yaml | 7 ++ .../alternative_worlds/style_06/a.yaml | 7 ++ .../alternative_worlds/style_06/b.yaml | 7 ++ .../alternative_worlds/style_06/c.yaml | 7 ++ .../alternative_worlds/style_07/a.yaml | 7 ++ .../alternative_worlds/style_07/b.yaml | 7 ++ .../alternative_worlds/style_07/c.yaml | 7 ++ .../alternative_worlds/style_08/a.yaml | 7 ++ .../alternative_worlds/style_08/b.yaml | 7 ++ .../alternative_worlds/style_08/c.yaml | 7 ++ .../hellaswag/alternative_worlds/styles.py | 87 +++++++++++++++++++ lm_eval/tasks/hellaswag/hellaswag.yaml | 2 +- 29 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/README.md create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml create mode 100644 lm_eval/tasks/hellaswag/alternative_worlds/styles.py diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/README.md b/lm_eval/tasks/hellaswag/alternative_worlds/README.md new file mode 100644 index 00000000..a9f58e69 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml b/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml new file mode 100644 index 00000000..cbd45058 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/_hellaswag_alt_yaml @@ -0,0 +1,16 @@ +dataset_path: Rowan/hellaswag +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: null +process_docs: !function ../utils.process_docs +doc_to_text: "{{query}}" +doc_to_target: "{{label}}" +doc_to_choice: "{{choices}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml new file mode 100644 index 00000000..c60df999 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/hellaswag_alt.yaml @@ -0,0 +1,10 @@ +group: hellaswag_alt +task: + - hellaswag_01 + - hellaswag_02 + - hellaswag_03 + - hellaswag_04 + - hellaswag_05 + - hellaswag_06 + - hellaswag_07 + - hellaswag_08 diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml new file mode 100644 index 00000000..dc363efa --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_01 +group_alias: style_01 +task: hellaswag_01a +task_alias: a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml new file mode 100644 index 00000000..8049df97 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_01 +group_alias: style_01 +task: hellaswag_01b +task_alias: b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml new file mode 100644 index 00000000..62ed4949 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_01 +group_alias: style_01 +task: hellaswag_01c +task_alias: c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml new file mode 100644 index 00000000..328be078 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_02 +group_alias: style_02 +task: hellaswag_02a +task_alias: a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml new file mode 100644 index 00000000..73d01b56 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_02 +group_alias: style_02 +task: hellaswag_02b +task_alias: b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml new file mode 100644 index 00000000..32a8d8d5 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_02 +group_alias: style_02 +task: hellaswag_02c +task_alias: c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml new file mode 100644 index 00000000..407b84de --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_03 +group_alias: style_03 +task: hellaswag_03a +task_alias: a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml new file mode 100644 index 00000000..ef1f6127 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_03 +group_alias: style_03 +task: hellaswag_03b +task_alias: b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml new file mode 100644 index 00000000..1e7edecb --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_03 +group_alias: style_03 +task: hellaswag_03c +task_alias: c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml new file mode 100644 index 00000000..56cef2bf --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_04 +group_alias: style_04 +task: hellaswag_04a +task_alias: a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml new file mode 100644 index 00000000..04bb9397 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_04 +group_alias: style_04 +task: hellaswag_04b +task_alias: b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml new file mode 100644 index 00000000..aee06df7 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_04 +group_alias: style_04 +task: hellaswag_04c +task_alias: c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml new file mode 100644 index 00000000..d7709b4e --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_05 +group_alias: style_05 +task: hellaswag_05a +task_alias: a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml new file mode 100644 index 00000000..a0ed5a99 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_05 +group_alias: style_05 +task: hellaswag_05b +task_alias: b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml new file mode 100644 index 00000000..da95c1e1 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_05 +group_alias: style_05 +task: hellaswag_05c +task_alias: c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml new file mode 100644 index 00000000..fd438dbf --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_06 +group_alias: style_06 +task: hellaswag_06a +task_alias: a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml new file mode 100644 index 00000000..e7bf0051 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_06 +group_alias: style_06 +task: hellaswag_06b +task_alias: b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml new file mode 100644 index 00000000..a93af76b --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_06 +group_alias: style_06 +task: hellaswag_06c +task_alias: c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml new file mode 100644 index 00000000..391cad9c --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_07 +group_alias: style_07 +task: hellaswag_07a +task_alias: a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml new file mode 100644 index 00000000..d4547f29 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_07 +group_alias: style_07 +task: hellaswag_07b +task_alias: b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml new file mode 100644 index 00000000..223e60e2 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_07 +group_alias: style_07 +task: hellaswag_07c +task_alias: c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml new file mode 100644 index 00000000..6fc8768f --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_08 +group_alias: style_08 +task: hellaswag_08a +task_alias: a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml new file mode 100644 index 00000000..dd8b9762 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_08 +group_alias: style_08 +task: hellaswag_08b +task_alias: b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml new file mode 100644 index 00000000..d92a8465 --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml @@ -0,0 +1,7 @@ +include: ../_hellaswag_alt_yaml +group: hellaswag_08 +group_alias: style_08 +task: hellaswag_08c +task_alias: c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c \ No newline at end of file diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/styles.py b/lm_eval/tasks/hellaswag/alternative_worlds/styles.py new file mode 100644 index 00000000..204465fb --- /dev/null +++ b/lm_eval/tasks/hellaswag/alternative_worlds/styles.py @@ -0,0 +1,87 @@ +import string +from functools import partial + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n\n".join([ + doc["query"]+"...", + " What is the most appropriate continuation?", + ] + [ + choice_string.format(i,j) for i,j in zip(letter_list, choices) + ] + ) + + return doc_to_text + +# Full continuation +def choice_A(doc): + return doc["choices"] + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t","") for letter in letter_list] + + return letter_list + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter+" " for letter in letter_list] + + return [letter+choice for letter, choice in zip(letter_list, doc["choices"])] + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") + + diff --git a/lm_eval/tasks/hellaswag/hellaswag.yaml b/lm_eval/tasks/hellaswag/hellaswag.yaml index 0ca82d2a..a412098b 100644 --- a/lm_eval/tasks/hellaswag/hellaswag.yaml +++ b/lm_eval/tasks/hellaswag/hellaswag.yaml @@ -1,7 +1,7 @@ group: - multiple_choice task: hellaswag -dataset_path: hellaswag +dataset_path: Rowan/hellaswag dataset_name: null output_type: multiple_choice training_split: train -- GitLab From bbd6ab3af23a8a862f35e94625eb07e14e10d40d Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 17 Nov 2023 03:42:26 +0000 Subject: [PATCH 16/50] added alternative prompts for mathqa --- .../tasks/mathqa/alternative_worlds/README.md | 20 ++++ .../alternative_worlds/_mathqa_alt_yaml | 12 +++ .../mathqa/alternative_worlds/mathqa_alt.yaml | 10 ++ .../mathqa/alternative_worlds/style_01/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_01/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_01/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_02/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_02/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_02/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_03/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_03/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_03/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_04/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_04/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_04/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_05/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_05/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_05/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_06/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_06/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_06/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_07/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_07/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_07/c.yaml | 7 ++ .../mathqa/alternative_worlds/style_08/a.yaml | 7 ++ .../mathqa/alternative_worlds/style_08/b.yaml | 7 ++ .../mathqa/alternative_worlds/style_08/c.yaml | 7 ++ .../tasks/mathqa/alternative_worlds/styles.py | 93 +++++++++++++++++++ 28 files changed, 303 insertions(+) create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/README.md create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/styles.py diff --git a/lm_eval/tasks/mathqa/alternative_worlds/README.md b/lm_eval/tasks/mathqa/alternative_worlds/README.md new file mode 100644 index 00000000..a9f58e69 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml b/lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml new file mode 100644 index 00000000..02f098af --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml @@ -0,0 +1,12 @@ +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml b/lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml new file mode 100644 index 00000000..e2d308ab --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml @@ -0,0 +1,10 @@ +group: mathqa_alt +task: + - mathqa_01 + - mathqa_02 + - mathqa_03 + - mathqa_04 + - mathqa_05 + - mathqa_06 + - mathqa_07 + - mathqa_08 diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml new file mode 100644 index 00000000..a0d9d97e --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_01 +group_alias: style_01 +task: mathqa_01a +task_alias: a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml new file mode 100644 index 00000000..7dfc3c7a --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_01 +group_alias: style_01 +task: mathqa_01b +task_alias: b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml new file mode 100644 index 00000000..d2e34371 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_01 +group_alias: style_01 +task: mathqa_01c +task_alias: c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml new file mode 100644 index 00000000..761dd38a --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_02 +group_alias: style_02 +task: mathqa_02a +task_alias: a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml new file mode 100644 index 00000000..04c89fb7 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_02 +group_alias: style_02 +task: mathqa_02b +task_alias: b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml new file mode 100644 index 00000000..8a64ecfb --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_02 +group_alias: style_02 +task: mathqa_02c +task_alias: c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml new file mode 100644 index 00000000..07ab19e8 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_03 +group_alias: style_03 +task: mathqa_03a +task_alias: a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml new file mode 100644 index 00000000..d4b36dd5 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_03 +group_alias: style_03 +task: mathqa_03b +task_alias: b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml new file mode 100644 index 00000000..af3db476 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_03 +group_alias: style_03 +task: mathqa_03c +task_alias: c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml new file mode 100644 index 00000000..b2727c34 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_04 +group_alias: style_04 +task: mathqa_04a +task_alias: a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml new file mode 100644 index 00000000..0871ce9a --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_04 +group_alias: style_04 +task: mathqa_04b +task_alias: b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml new file mode 100644 index 00000000..28f8a443 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_04 +group_alias: style_04 +task: mathqa_04c +task_alias: c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml new file mode 100644 index 00000000..83bff053 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_05 +group_alias: style_05 +task: mathqa_05a +task_alias: a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml new file mode 100644 index 00000000..1b412bf7 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_05 +group_alias: style_05 +task: mathqa_05b +task_alias: b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml new file mode 100644 index 00000000..b49b6b2f --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_05 +group_alias: style_05 +task: mathqa_05c +task_alias: c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml new file mode 100644 index 00000000..b3a69d31 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_06 +group_alias: style_06 +task: mathqa_06a +task_alias: a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml new file mode 100644 index 00000000..e72dab31 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_06 +group_alias: style_06 +task: mathqa_06b +task_alias: b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml new file mode 100644 index 00000000..9b4ecfda --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_06 +group_alias: style_06 +task: mathqa_06c +task_alias: c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml new file mode 100644 index 00000000..5b96ba17 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_07 +group_alias: style_07 +task: mathqa_07a +task_alias: a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml new file mode 100644 index 00000000..54713bfb --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_07 +group_alias: style_07 +task: mathqa_07b +task_alias: b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml new file mode 100644 index 00000000..0d90b216 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_07 +group_alias: style_07 +task: mathqa_07c +task_alias: c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml new file mode 100644 index 00000000..dc092c2d --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_08 +group_alias: style_08 +task: mathqa_08a +task_alias: a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml new file mode 100644 index 00000000..1f51be56 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_08 +group_alias: style_08 +task: mathqa_08b +task_alias: b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml new file mode 100644 index 00000000..3ea0434b --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml @@ -0,0 +1,7 @@ +include: ../_mathqa_alt_yaml +group: mathqa_08 +group_alias: style_08 +task: mathqa_08c +task_alias: c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c \ No newline at end of file diff --git a/lm_eval/tasks/mathqa/alternative_worlds/styles.py b/lm_eval/tasks/mathqa/alternative_worlds/styles.py new file mode 100644 index 00000000..15c93d00 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/styles.py @@ -0,0 +1,93 @@ +import re +import string +from functools import partial + +def parse_choices(doc): + choices = [ + c[4:].rstrip(" ,") + for c in re.findall(r"[abcd] \) .*?, |e \) .*?$", doc["options"]) + ] + return choices + +def doc_to_text_base(alphabet, style, doc): + + choices = parse_choices(doc) + + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + doc_to_text = "\n\n".join( + [doc["Problem"]] + [ + choice_string.format(i,j) for i,j in zip(letter_list, choices) + ] + ) + + return doc_to_text + +# Full continuation +def choice_A(doc): + return parse_choices(doc) + +# Letters only +def choice_B(alphabet, style, doc): + + choices = parse_choices(doc) + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t","") for letter in letter_list] + + return letter_list + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = parse_choices(doc) + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter+" " for letter in letter_list] + + return [letter+choice for letter, choice in zip(letter_list, choices)] + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") + + -- GitLab From 4acb339e410600d4bf3ed8352a8080d8e3b6f43e Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 6 Dec 2023 08:10:59 +0000 Subject: [PATCH 17/50] fixed brier score to accomodate samples with different number of choices --- lm_eval/api/metrics.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 32e7595d..2596da2a 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -1,6 +1,6 @@ import math from collections.abc import Iterable - +from collections import defaultdict import numpy as np import sacrebleu import sklearn.metrics @@ -111,13 +111,26 @@ def ter(items): @register_aggregation("brier_score") def brier_score(items): # This is a passthrough function - gold, predictions = list(zip(*items)) - print(type(predictions)) - predictions = np.array(predictions) - print(predictions.shape) - gold = np.array(gold) - gold_one_hot = np.eye(len(predictions[0]))[gold] - return np.mean(np.sum((predictions - gold_one_hot) ** 2, axis=1)) + + # Certain datasets like arc_easy can have a different number of choices. + golds, predictions = list(zip(*items)) + + pred_group = defaultdict(list) + gold_group = defaultdict(list) + for gold, pred in zip(golds, predictions): + pred_group[len(pred)].append(pred) + gold_group[len(pred)].append(gold) + + total_size = 0 + average = 0 + for g, p in zip(gold_group.values(), pred_group.values()): + _p = np.array(p) + _g = np.array(g) + _g_one_hot = np.eye(len(_p[0]))[_g] + average += np.mean(np.sum((_p - _g_one_hot) ** 2, axis=1))*len(_g) + total_size += len(_g) + + return average/total_size @register_metric( -- GitLab From c84190cbd1a0920b50faabd816e9b06bf427878c Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 6 Dec 2023 08:21:27 +0000 Subject: [PATCH 18/50] add aggregation --- lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml index 1c55ee99..633826a2 100644 --- a/lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml +++ b/lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml @@ -20,4 +20,5 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score + aggregation: brier_score higher_is_better: false -- GitLab From 50df117bffc3a87c6e33b46dd315df2b7ec096c1 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 6 Dec 2023 08:21:49 +0000 Subject: [PATCH 19/50] removed mmlu altworlds --- .../full_continuation/style_01/_template_yaml | 13 ------------- .../style_01/style_01_abstract_algebra.yaml | 4 ---- .../style_01/style_01_anatomy.yaml | 4 ---- .../style_01/style_01_astronomy.yaml | 4 ---- .../style_01/style_01_business_ethics.yaml | 4 ---- .../style_01/style_01_clinical_knowledge.yaml | 4 ---- .../style_01/style_01_college_biology.yaml | 4 ---- .../style_01/style_01_college_chemistry.yaml | 4 ---- .../style_01/style_01_college_computer_science.yaml | 4 ---- .../style_01/style_01_college_mathematics.yaml | 4 ---- .../style_01/style_01_college_medicine.yaml | 4 ---- .../style_01/style_01_college_physics.yaml | 4 ---- .../style_01/style_01_computer_security.yaml | 4 ---- .../style_01/style_01_conceptual_physics.yaml | 4 ---- .../style_01/style_01_econometrics.yaml | 4 ---- .../style_01/style_01_electrical_engineering.yaml | 4 ---- .../style_01/style_01_elementary_mathematics.yaml | 4 ---- .../style_01/style_01_formal_logic.yaml | 4 ---- .../style_01/style_01_global_facts.yaml | 4 ---- .../style_01/style_01_high_school_biology.yaml | 4 ---- .../style_01/style_01_high_school_chemistry.yaml | 4 ---- .../style_01_high_school_computer_science.yaml | 4 ---- .../style_01_high_school_european_history.yaml | 4 ---- .../style_01/style_01_high_school_geography.yaml | 4 ---- ...tyle_01_high_school_government_and_politics.yaml | 4 ---- .../style_01_high_school_macroeconomics.yaml | 4 ---- .../style_01/style_01_high_school_mathematics.yaml | 4 ---- .../style_01_high_school_microeconomics.yaml | 4 ---- .../style_01/style_01_high_school_physics.yaml | 4 ---- .../style_01/style_01_high_school_psychology.yaml | 4 ---- .../style_01/style_01_high_school_statistics.yaml | 4 ---- .../style_01/style_01_high_school_us_history.yaml | 4 ---- .../style_01_high_school_world_history.yaml | 4 ---- .../style_01/style_01_human_aging.yaml | 4 ---- .../style_01/style_01_human_sexuality.yaml | 4 ---- .../style_01/style_01_international_law.yaml | 4 ---- .../style_01/style_01_jurisprudence.yaml | 4 ---- .../style_01/style_01_logical_fallacies.yaml | 4 ---- .../style_01/style_01_machine_learning.yaml | 4 ---- .../style_01/style_01_management.yaml | 4 ---- .../style_01/style_01_marketing.yaml | 4 ---- .../style_01/style_01_medical_genetics.yaml | 4 ---- .../style_01/style_01_miscellaneous.yaml | 4 ---- .../style_01/style_01_moral_disputes.yaml | 4 ---- .../style_01/style_01_moral_scenarios.yaml | 4 ---- .../style_01/style_01_nutrition.yaml | 4 ---- .../style_01/style_01_philosophy.yaml | 4 ---- .../style_01/style_01_prehistory.yaml | 4 ---- .../style_01/style_01_professional_accounting.yaml | 4 ---- .../style_01/style_01_professional_law.yaml | 4 ---- .../style_01/style_01_professional_medicine.yaml | 4 ---- .../style_01/style_01_professional_psychology.yaml | 4 ---- .../style_01/style_01_public_relations.yaml | 4 ---- .../style_01/style_01_security_studies.yaml | 4 ---- .../style_01/style_01_sociology.yaml | 4 ---- .../style_01/style_01_us_foreign_policy.yaml | 4 ---- .../style_01/style_01_virology.yaml | 4 ---- .../style_01/style_01_world_religions.yaml | 4 ---- .../full_continuation/style_02/_template_yaml | 13 ------------- .../style_02/style_02_abstract_algebra.yaml | 4 ---- .../style_02/style_02_anatomy.yaml | 4 ---- .../style_02/style_02_astronomy.yaml | 4 ---- .../style_02/style_02_business_ethics.yaml | 4 ---- .../style_02/style_02_clinical_knowledge.yaml | 4 ---- .../style_02/style_02_college_biology.yaml | 4 ---- .../style_02/style_02_college_chemistry.yaml | 4 ---- .../style_02/style_02_college_computer_science.yaml | 4 ---- .../style_02/style_02_college_mathematics.yaml | 4 ---- .../style_02/style_02_college_medicine.yaml | 4 ---- .../style_02/style_02_college_physics.yaml | 4 ---- .../style_02/style_02_computer_security.yaml | 4 ---- .../style_02/style_02_conceptual_physics.yaml | 4 ---- .../style_02/style_02_econometrics.yaml | 4 ---- .../style_02/style_02_electrical_engineering.yaml | 4 ---- .../style_02/style_02_elementary_mathematics.yaml | 4 ---- .../style_02/style_02_formal_logic.yaml | 4 ---- .../style_02/style_02_global_facts.yaml | 4 ---- .../style_02/style_02_high_school_biology.yaml | 4 ---- .../style_02/style_02_high_school_chemistry.yaml | 4 ---- .../style_02_high_school_computer_science.yaml | 4 ---- .../style_02_high_school_european_history.yaml | 4 ---- .../style_02/style_02_high_school_geography.yaml | 4 ---- ...tyle_02_high_school_government_and_politics.yaml | 4 ---- .../style_02_high_school_macroeconomics.yaml | 4 ---- .../style_02/style_02_high_school_mathematics.yaml | 4 ---- .../style_02_high_school_microeconomics.yaml | 4 ---- .../style_02/style_02_high_school_physics.yaml | 4 ---- .../style_02/style_02_high_school_psychology.yaml | 4 ---- .../style_02/style_02_high_school_statistics.yaml | 4 ---- .../style_02/style_02_high_school_us_history.yaml | 4 ---- .../style_02_high_school_world_history.yaml | 4 ---- .../style_02/style_02_human_aging.yaml | 4 ---- .../style_02/style_02_human_sexuality.yaml | 4 ---- .../style_02/style_02_international_law.yaml | 4 ---- .../style_02/style_02_jurisprudence.yaml | 4 ---- .../style_02/style_02_logical_fallacies.yaml | 4 ---- .../style_02/style_02_machine_learning.yaml | 4 ---- .../style_02/style_02_management.yaml | 4 ---- .../style_02/style_02_marketing.yaml | 4 ---- .../style_02/style_02_medical_genetics.yaml | 4 ---- .../style_02/style_02_miscellaneous.yaml | 4 ---- .../style_02/style_02_moral_disputes.yaml | 4 ---- .../style_02/style_02_moral_scenarios.yaml | 4 ---- .../style_02/style_02_nutrition.yaml | 4 ---- .../style_02/style_02_philosophy.yaml | 4 ---- .../style_02/style_02_prehistory.yaml | 4 ---- .../style_02/style_02_professional_accounting.yaml | 4 ---- .../style_02/style_02_professional_law.yaml | 4 ---- .../style_02/style_02_professional_medicine.yaml | 4 ---- .../style_02/style_02_professional_psychology.yaml | 4 ---- .../style_02/style_02_public_relations.yaml | 4 ---- .../style_02/style_02_security_studies.yaml | 4 ---- .../style_02/style_02_sociology.yaml | 4 ---- .../style_02/style_02_us_foreign_policy.yaml | 4 ---- .../style_02/style_02_virology.yaml | 4 ---- .../style_02/style_02_world_religions.yaml | 4 ---- .../full_continuation/style_03/_template_yaml | 13 ------------- .../style_03/style_03_abstract_algebra.yaml | 4 ---- .../style_03/style_03_anatomy.yaml | 4 ---- .../style_03/style_03_astronomy.yaml | 4 ---- .../style_03/style_03_business_ethics.yaml | 4 ---- .../style_03/style_03_clinical_knowledge.yaml | 4 ---- .../style_03/style_03_college_biology.yaml | 4 ---- .../style_03/style_03_college_chemistry.yaml | 4 ---- .../style_03/style_03_college_computer_science.yaml | 4 ---- .../style_03/style_03_college_mathematics.yaml | 4 ---- .../style_03/style_03_college_medicine.yaml | 4 ---- .../style_03/style_03_college_physics.yaml | 4 ---- .../style_03/style_03_computer_security.yaml | 4 ---- .../style_03/style_03_conceptual_physics.yaml | 4 ---- .../style_03/style_03_econometrics.yaml | 4 ---- .../style_03/style_03_electrical_engineering.yaml | 4 ---- .../style_03/style_03_elementary_mathematics.yaml | 4 ---- .../style_03/style_03_formal_logic.yaml | 4 ---- .../style_03/style_03_global_facts.yaml | 4 ---- .../style_03/style_03_high_school_biology.yaml | 4 ---- .../style_03/style_03_high_school_chemistry.yaml | 4 ---- .../style_03_high_school_computer_science.yaml | 4 ---- .../style_03_high_school_european_history.yaml | 4 ---- .../style_03/style_03_high_school_geography.yaml | 4 ---- ...tyle_03_high_school_government_and_politics.yaml | 4 ---- .../style_03_high_school_macroeconomics.yaml | 4 ---- .../style_03/style_03_high_school_mathematics.yaml | 4 ---- .../style_03_high_school_microeconomics.yaml | 4 ---- .../style_03/style_03_high_school_physics.yaml | 4 ---- .../style_03/style_03_high_school_psychology.yaml | 4 ---- .../style_03/style_03_high_school_statistics.yaml | 4 ---- .../style_03/style_03_high_school_us_history.yaml | 4 ---- .../style_03_high_school_world_history.yaml | 4 ---- .../style_03/style_03_human_aging.yaml | 4 ---- .../style_03/style_03_human_sexuality.yaml | 4 ---- .../style_03/style_03_international_law.yaml | 4 ---- .../style_03/style_03_jurisprudence.yaml | 4 ---- .../style_03/style_03_logical_fallacies.yaml | 4 ---- .../style_03/style_03_machine_learning.yaml | 4 ---- .../style_03/style_03_management.yaml | 4 ---- .../style_03/style_03_marketing.yaml | 4 ---- .../style_03/style_03_medical_genetics.yaml | 4 ---- .../style_03/style_03_miscellaneous.yaml | 4 ---- .../style_03/style_03_moral_disputes.yaml | 4 ---- .../style_03/style_03_moral_scenarios.yaml | 4 ---- .../style_03/style_03_nutrition.yaml | 4 ---- .../style_03/style_03_philosophy.yaml | 4 ---- .../style_03/style_03_prehistory.yaml | 4 ---- .../style_03/style_03_professional_accounting.yaml | 4 ---- .../style_03/style_03_professional_law.yaml | 4 ---- .../style_03/style_03_professional_medicine.yaml | 4 ---- .../style_03/style_03_professional_psychology.yaml | 4 ---- .../style_03/style_03_public_relations.yaml | 4 ---- .../style_03/style_03_security_studies.yaml | 4 ---- .../style_03/style_03_sociology.yaml | 4 ---- .../style_03/style_03_us_foreign_policy.yaml | 4 ---- .../style_03/style_03_virology.yaml | 4 ---- .../style_03/style_03_world_religions.yaml | 4 ---- .../full_continuation/style_04/_template_yaml | 13 ------------- .../style_04/style_04_abstract_algebra.yaml | 4 ---- .../style_04/style_04_anatomy.yaml | 4 ---- .../style_04/style_04_astronomy.yaml | 4 ---- .../style_04/style_04_business_ethics.yaml | 4 ---- .../style_04/style_04_clinical_knowledge.yaml | 4 ---- .../style_04/style_04_college_biology.yaml | 4 ---- .../style_04/style_04_college_chemistry.yaml | 4 ---- .../style_04/style_04_college_computer_science.yaml | 4 ---- .../style_04/style_04_college_mathematics.yaml | 4 ---- .../style_04/style_04_college_medicine.yaml | 4 ---- .../style_04/style_04_college_physics.yaml | 4 ---- .../style_04/style_04_computer_security.yaml | 4 ---- .../style_04/style_04_conceptual_physics.yaml | 4 ---- .../style_04/style_04_econometrics.yaml | 4 ---- .../style_04/style_04_electrical_engineering.yaml | 4 ---- .../style_04/style_04_elementary_mathematics.yaml | 4 ---- .../style_04/style_04_formal_logic.yaml | 4 ---- .../style_04/style_04_global_facts.yaml | 4 ---- .../style_04/style_04_high_school_biology.yaml | 4 ---- .../style_04/style_04_high_school_chemistry.yaml | 4 ---- .../style_04_high_school_computer_science.yaml | 4 ---- .../style_04_high_school_european_history.yaml | 4 ---- .../style_04/style_04_high_school_geography.yaml | 4 ---- ...tyle_04_high_school_government_and_politics.yaml | 4 ---- .../style_04_high_school_macroeconomics.yaml | 4 ---- .../style_04/style_04_high_school_mathematics.yaml | 4 ---- .../style_04_high_school_microeconomics.yaml | 4 ---- .../style_04/style_04_high_school_physics.yaml | 4 ---- .../style_04/style_04_high_school_psychology.yaml | 4 ---- .../style_04/style_04_high_school_statistics.yaml | 4 ---- .../style_04/style_04_high_school_us_history.yaml | 4 ---- .../style_04_high_school_world_history.yaml | 4 ---- .../style_04/style_04_human_aging.yaml | 4 ---- .../style_04/style_04_human_sexuality.yaml | 4 ---- .../style_04/style_04_international_law.yaml | 4 ---- .../style_04/style_04_jurisprudence.yaml | 4 ---- .../style_04/style_04_logical_fallacies.yaml | 4 ---- .../style_04/style_04_machine_learning.yaml | 4 ---- .../style_04/style_04_management.yaml | 4 ---- .../style_04/style_04_marketing.yaml | 4 ---- .../style_04/style_04_medical_genetics.yaml | 4 ---- .../style_04/style_04_miscellaneous.yaml | 4 ---- .../style_04/style_04_moral_disputes.yaml | 4 ---- .../style_04/style_04_moral_scenarios.yaml | 4 ---- .../style_04/style_04_nutrition.yaml | 4 ---- .../style_04/style_04_philosophy.yaml | 4 ---- .../style_04/style_04_prehistory.yaml | 4 ---- .../style_04/style_04_professional_accounting.yaml | 4 ---- .../style_04/style_04_professional_law.yaml | 4 ---- .../style_04/style_04_professional_medicine.yaml | 4 ---- .../style_04/style_04_professional_psychology.yaml | 4 ---- .../style_04/style_04_public_relations.yaml | 4 ---- .../style_04/style_04_security_studies.yaml | 4 ---- .../style_04/style_04_sociology.yaml | 4 ---- .../style_04/style_04_us_foreign_policy.yaml | 4 ---- .../style_04/style_04_virology.yaml | 4 ---- .../style_04/style_04_world_religions.yaml | 4 ---- .../full_continuation/style_05/_template_yaml | 13 ------------- .../style_05/style_05_abstract_algebra.yaml | 4 ---- .../style_05/style_05_anatomy.yaml | 4 ---- .../style_05/style_05_astronomy.yaml | 4 ---- .../style_05/style_05_business_ethics.yaml | 4 ---- .../style_05/style_05_clinical_knowledge.yaml | 4 ---- .../style_05/style_05_college_biology.yaml | 4 ---- .../style_05/style_05_college_chemistry.yaml | 4 ---- .../style_05/style_05_college_computer_science.yaml | 4 ---- .../style_05/style_05_college_mathematics.yaml | 4 ---- .../style_05/style_05_college_medicine.yaml | 4 ---- .../style_05/style_05_college_physics.yaml | 4 ---- .../style_05/style_05_computer_security.yaml | 4 ---- .../style_05/style_05_conceptual_physics.yaml | 4 ---- .../style_05/style_05_econometrics.yaml | 4 ---- .../style_05/style_05_electrical_engineering.yaml | 4 ---- .../style_05/style_05_elementary_mathematics.yaml | 4 ---- .../style_05/style_05_formal_logic.yaml | 4 ---- .../style_05/style_05_global_facts.yaml | 4 ---- .../style_05/style_05_high_school_biology.yaml | 4 ---- .../style_05/style_05_high_school_chemistry.yaml | 4 ---- .../style_05_high_school_computer_science.yaml | 4 ---- .../style_05_high_school_european_history.yaml | 4 ---- .../style_05/style_05_high_school_geography.yaml | 4 ---- ...tyle_05_high_school_government_and_politics.yaml | 4 ---- .../style_05_high_school_macroeconomics.yaml | 4 ---- .../style_05/style_05_high_school_mathematics.yaml | 4 ---- .../style_05_high_school_microeconomics.yaml | 4 ---- .../style_05/style_05_high_school_physics.yaml | 4 ---- .../style_05/style_05_high_school_psychology.yaml | 4 ---- .../style_05/style_05_high_school_statistics.yaml | 4 ---- .../style_05/style_05_high_school_us_history.yaml | 4 ---- .../style_05_high_school_world_history.yaml | 4 ---- .../style_05/style_05_human_aging.yaml | 4 ---- .../style_05/style_05_human_sexuality.yaml | 4 ---- .../style_05/style_05_international_law.yaml | 4 ---- .../style_05/style_05_jurisprudence.yaml | 4 ---- .../style_05/style_05_logical_fallacies.yaml | 4 ---- .../style_05/style_05_machine_learning.yaml | 4 ---- .../style_05/style_05_management.yaml | 4 ---- .../style_05/style_05_marketing.yaml | 4 ---- .../style_05/style_05_medical_genetics.yaml | 4 ---- .../style_05/style_05_miscellaneous.yaml | 4 ---- .../style_05/style_05_moral_disputes.yaml | 4 ---- .../style_05/style_05_moral_scenarios.yaml | 4 ---- .../style_05/style_05_nutrition.yaml | 4 ---- .../style_05/style_05_philosophy.yaml | 4 ---- .../style_05/style_05_prehistory.yaml | 4 ---- .../style_05/style_05_professional_accounting.yaml | 4 ---- .../style_05/style_05_professional_law.yaml | 4 ---- .../style_05/style_05_professional_medicine.yaml | 4 ---- .../style_05/style_05_professional_psychology.yaml | 4 ---- .../style_05/style_05_public_relations.yaml | 4 ---- .../style_05/style_05_security_studies.yaml | 4 ---- .../style_05/style_05_sociology.yaml | 4 ---- .../style_05/style_05_us_foreign_policy.yaml | 4 ---- .../style_05/style_05_virology.yaml | 4 ---- .../style_05/style_05_world_religions.yaml | 4 ---- .../letters_only/style_01/_template_yaml | 13 ------------- .../style_01/style_01_abstract_algebra.yaml | 4 ---- .../letters_only/style_01/style_01_anatomy.yaml | 4 ---- .../letters_only/style_01/style_01_astronomy.yaml | 4 ---- .../style_01/style_01_business_ethics.yaml | 4 ---- .../style_01/style_01_clinical_knowledge.yaml | 4 ---- .../style_01/style_01_college_biology.yaml | 4 ---- .../style_01/style_01_college_chemistry.yaml | 4 ---- .../style_01/style_01_college_computer_science.yaml | 4 ---- .../style_01/style_01_college_mathematics.yaml | 4 ---- .../style_01/style_01_college_medicine.yaml | 4 ---- .../style_01/style_01_college_physics.yaml | 4 ---- .../style_01/style_01_computer_security.yaml | 4 ---- .../style_01/style_01_conceptual_physics.yaml | 4 ---- .../style_01/style_01_econometrics.yaml | 4 ---- .../style_01/style_01_electrical_engineering.yaml | 4 ---- .../style_01/style_01_elementary_mathematics.yaml | 4 ---- .../style_01/style_01_formal_logic.yaml | 4 ---- .../style_01/style_01_global_facts.yaml | 4 ---- .../style_01/style_01_high_school_biology.yaml | 4 ---- .../style_01/style_01_high_school_chemistry.yaml | 4 ---- .../style_01_high_school_computer_science.yaml | 4 ---- .../style_01_high_school_european_history.yaml | 4 ---- .../style_01/style_01_high_school_geography.yaml | 4 ---- ...tyle_01_high_school_government_and_politics.yaml | 4 ---- .../style_01_high_school_macroeconomics.yaml | 4 ---- .../style_01/style_01_high_school_mathematics.yaml | 4 ---- .../style_01_high_school_microeconomics.yaml | 4 ---- .../style_01/style_01_high_school_physics.yaml | 4 ---- .../style_01/style_01_high_school_psychology.yaml | 4 ---- .../style_01/style_01_high_school_statistics.yaml | 4 ---- .../style_01/style_01_high_school_us_history.yaml | 4 ---- .../style_01_high_school_world_history.yaml | 4 ---- .../letters_only/style_01/style_01_human_aging.yaml | 4 ---- .../style_01/style_01_human_sexuality.yaml | 4 ---- .../style_01/style_01_international_law.yaml | 4 ---- .../style_01/style_01_jurisprudence.yaml | 4 ---- .../style_01/style_01_logical_fallacies.yaml | 4 ---- .../style_01/style_01_machine_learning.yaml | 4 ---- .../letters_only/style_01/style_01_management.yaml | 4 ---- .../letters_only/style_01/style_01_marketing.yaml | 4 ---- .../style_01/style_01_medical_genetics.yaml | 4 ---- .../style_01/style_01_miscellaneous.yaml | 4 ---- .../style_01/style_01_moral_disputes.yaml | 4 ---- .../style_01/style_01_moral_scenarios.yaml | 4 ---- .../letters_only/style_01/style_01_nutrition.yaml | 4 ---- .../letters_only/style_01/style_01_philosophy.yaml | 4 ---- .../letters_only/style_01/style_01_prehistory.yaml | 4 ---- .../style_01/style_01_professional_accounting.yaml | 4 ---- .../style_01/style_01_professional_law.yaml | 4 ---- .../style_01/style_01_professional_medicine.yaml | 4 ---- .../style_01/style_01_professional_psychology.yaml | 4 ---- .../style_01/style_01_public_relations.yaml | 4 ---- .../style_01/style_01_security_studies.yaml | 4 ---- .../letters_only/style_01/style_01_sociology.yaml | 4 ---- .../style_01/style_01_us_foreign_policy.yaml | 4 ---- .../letters_only/style_01/style_01_virology.yaml | 4 ---- .../style_01/style_01_world_religions.yaml | 4 ---- .../letters_only/style_02/_template_yaml | 13 ------------- .../style_02/style_02_abstract_algebra.yaml | 4 ---- .../letters_only/style_02/style_02_anatomy.yaml | 4 ---- .../letters_only/style_02/style_02_astronomy.yaml | 4 ---- .../style_02/style_02_business_ethics.yaml | 4 ---- .../style_02/style_02_clinical_knowledge.yaml | 4 ---- .../style_02/style_02_college_biology.yaml | 4 ---- .../style_02/style_02_college_chemistry.yaml | 4 ---- .../style_02/style_02_college_computer_science.yaml | 4 ---- .../style_02/style_02_college_mathematics.yaml | 4 ---- .../style_02/style_02_college_medicine.yaml | 4 ---- .../style_02/style_02_college_physics.yaml | 4 ---- .../style_02/style_02_computer_security.yaml | 4 ---- .../style_02/style_02_conceptual_physics.yaml | 4 ---- .../style_02/style_02_econometrics.yaml | 4 ---- .../style_02/style_02_electrical_engineering.yaml | 4 ---- .../style_02/style_02_elementary_mathematics.yaml | 4 ---- .../style_02/style_02_formal_logic.yaml | 4 ---- .../style_02/style_02_global_facts.yaml | 4 ---- .../style_02/style_02_high_school_biology.yaml | 4 ---- .../style_02/style_02_high_school_chemistry.yaml | 4 ---- .../style_02_high_school_computer_science.yaml | 4 ---- .../style_02_high_school_european_history.yaml | 4 ---- .../style_02/style_02_high_school_geography.yaml | 4 ---- ...tyle_02_high_school_government_and_politics.yaml | 4 ---- .../style_02_high_school_macroeconomics.yaml | 4 ---- .../style_02/style_02_high_school_mathematics.yaml | 4 ---- .../style_02_high_school_microeconomics.yaml | 4 ---- .../style_02/style_02_high_school_physics.yaml | 4 ---- .../style_02/style_02_high_school_psychology.yaml | 4 ---- .../style_02/style_02_high_school_statistics.yaml | 4 ---- .../style_02/style_02_high_school_us_history.yaml | 4 ---- .../style_02_high_school_world_history.yaml | 4 ---- .../letters_only/style_02/style_02_human_aging.yaml | 4 ---- .../style_02/style_02_human_sexuality.yaml | 4 ---- .../style_02/style_02_international_law.yaml | 4 ---- .../style_02/style_02_jurisprudence.yaml | 4 ---- .../style_02/style_02_logical_fallacies.yaml | 4 ---- .../style_02/style_02_machine_learning.yaml | 4 ---- .../letters_only/style_02/style_02_management.yaml | 4 ---- .../letters_only/style_02/style_02_marketing.yaml | 4 ---- .../style_02/style_02_medical_genetics.yaml | 4 ---- .../style_02/style_02_miscellaneous.yaml | 4 ---- .../style_02/style_02_moral_disputes.yaml | 4 ---- .../style_02/style_02_moral_scenarios.yaml | 4 ---- .../letters_only/style_02/style_02_nutrition.yaml | 4 ---- .../letters_only/style_02/style_02_philosophy.yaml | 4 ---- .../letters_only/style_02/style_02_prehistory.yaml | 4 ---- .../style_02/style_02_professional_accounting.yaml | 4 ---- .../style_02/style_02_professional_law.yaml | 4 ---- .../style_02/style_02_professional_medicine.yaml | 4 ---- .../style_02/style_02_professional_psychology.yaml | 4 ---- .../style_02/style_02_public_relations.yaml | 4 ---- .../style_02/style_02_security_studies.yaml | 4 ---- .../letters_only/style_02/style_02_sociology.yaml | 4 ---- .../style_02/style_02_us_foreign_policy.yaml | 4 ---- .../letters_only/style_02/style_02_virology.yaml | 4 ---- .../style_02/style_02_world_religions.yaml | 4 ---- .../letters_only/style_03/_template_yaml | 13 ------------- .../style_03/style_03_abstract_algebra.yaml | 4 ---- .../letters_only/style_03/style_03_anatomy.yaml | 4 ---- .../letters_only/style_03/style_03_astronomy.yaml | 4 ---- .../style_03/style_03_business_ethics.yaml | 4 ---- .../style_03/style_03_clinical_knowledge.yaml | 4 ---- .../style_03/style_03_college_biology.yaml | 4 ---- .../style_03/style_03_college_chemistry.yaml | 4 ---- .../style_03/style_03_college_computer_science.yaml | 4 ---- .../style_03/style_03_college_mathematics.yaml | 4 ---- .../style_03/style_03_college_medicine.yaml | 4 ---- .../style_03/style_03_college_physics.yaml | 4 ---- .../style_03/style_03_computer_security.yaml | 4 ---- .../style_03/style_03_conceptual_physics.yaml | 4 ---- .../style_03/style_03_econometrics.yaml | 4 ---- .../style_03/style_03_electrical_engineering.yaml | 4 ---- .../style_03/style_03_elementary_mathematics.yaml | 4 ---- .../style_03/style_03_formal_logic.yaml | 4 ---- .../style_03/style_03_global_facts.yaml | 4 ---- .../style_03/style_03_high_school_biology.yaml | 4 ---- .../style_03/style_03_high_school_chemistry.yaml | 4 ---- .../style_03_high_school_computer_science.yaml | 4 ---- .../style_03_high_school_european_history.yaml | 4 ---- .../style_03/style_03_high_school_geography.yaml | 4 ---- ...tyle_03_high_school_government_and_politics.yaml | 4 ---- .../style_03_high_school_macroeconomics.yaml | 4 ---- .../style_03/style_03_high_school_mathematics.yaml | 4 ---- .../style_03_high_school_microeconomics.yaml | 4 ---- .../style_03/style_03_high_school_physics.yaml | 4 ---- .../style_03/style_03_high_school_psychology.yaml | 4 ---- .../style_03/style_03_high_school_statistics.yaml | 4 ---- .../style_03/style_03_high_school_us_history.yaml | 4 ---- .../style_03_high_school_world_history.yaml | 4 ---- .../letters_only/style_03/style_03_human_aging.yaml | 4 ---- .../style_03/style_03_human_sexuality.yaml | 4 ---- .../style_03/style_03_international_law.yaml | 4 ---- .../style_03/style_03_jurisprudence.yaml | 4 ---- .../style_03/style_03_logical_fallacies.yaml | 4 ---- .../style_03/style_03_machine_learning.yaml | 4 ---- .../letters_only/style_03/style_03_management.yaml | 4 ---- .../letters_only/style_03/style_03_marketing.yaml | 4 ---- .../style_03/style_03_medical_genetics.yaml | 4 ---- .../style_03/style_03_miscellaneous.yaml | 4 ---- .../style_03/style_03_moral_disputes.yaml | 4 ---- .../style_03/style_03_moral_scenarios.yaml | 4 ---- .../letters_only/style_03/style_03_nutrition.yaml | 4 ---- .../letters_only/style_03/style_03_philosophy.yaml | 4 ---- .../letters_only/style_03/style_03_prehistory.yaml | 4 ---- .../style_03/style_03_professional_accounting.yaml | 4 ---- .../style_03/style_03_professional_law.yaml | 4 ---- .../style_03/style_03_professional_medicine.yaml | 4 ---- .../style_03/style_03_professional_psychology.yaml | 4 ---- .../style_03/style_03_public_relations.yaml | 4 ---- .../style_03/style_03_security_studies.yaml | 4 ---- .../letters_only/style_03/style_03_sociology.yaml | 4 ---- .../style_03/style_03_us_foreign_policy.yaml | 4 ---- .../letters_only/style_03/style_03_virology.yaml | 4 ---- .../style_03/style_03_world_religions.yaml | 4 ---- .../letters_only/style_04/_template_yaml | 13 ------------- .../style_04/style_04_abstract_algebra.yaml | 4 ---- .../letters_only/style_04/style_04_anatomy.yaml | 4 ---- .../letters_only/style_04/style_04_astronomy.yaml | 4 ---- .../style_04/style_04_business_ethics.yaml | 4 ---- .../style_04/style_04_clinical_knowledge.yaml | 4 ---- .../style_04/style_04_college_biology.yaml | 4 ---- .../style_04/style_04_college_chemistry.yaml | 4 ---- .../style_04/style_04_college_computer_science.yaml | 4 ---- .../style_04/style_04_college_mathematics.yaml | 4 ---- .../style_04/style_04_college_medicine.yaml | 4 ---- .../style_04/style_04_college_physics.yaml | 4 ---- .../style_04/style_04_computer_security.yaml | 4 ---- .../style_04/style_04_conceptual_physics.yaml | 4 ---- .../style_04/style_04_econometrics.yaml | 4 ---- .../style_04/style_04_electrical_engineering.yaml | 4 ---- .../style_04/style_04_elementary_mathematics.yaml | 4 ---- .../style_04/style_04_formal_logic.yaml | 4 ---- .../style_04/style_04_global_facts.yaml | 4 ---- .../style_04/style_04_high_school_biology.yaml | 4 ---- .../style_04/style_04_high_school_chemistry.yaml | 4 ---- .../style_04_high_school_computer_science.yaml | 4 ---- .../style_04_high_school_european_history.yaml | 4 ---- .../style_04/style_04_high_school_geography.yaml | 4 ---- ...tyle_04_high_school_government_and_politics.yaml | 4 ---- .../style_04_high_school_macroeconomics.yaml | 4 ---- .../style_04/style_04_high_school_mathematics.yaml | 4 ---- .../style_04_high_school_microeconomics.yaml | 4 ---- .../style_04/style_04_high_school_physics.yaml | 4 ---- .../style_04/style_04_high_school_psychology.yaml | 4 ---- .../style_04/style_04_high_school_statistics.yaml | 4 ---- .../style_04/style_04_high_school_us_history.yaml | 4 ---- .../style_04_high_school_world_history.yaml | 4 ---- .../letters_only/style_04/style_04_human_aging.yaml | 4 ---- .../style_04/style_04_human_sexuality.yaml | 4 ---- .../style_04/style_04_international_law.yaml | 4 ---- .../style_04/style_04_jurisprudence.yaml | 4 ---- .../style_04/style_04_logical_fallacies.yaml | 4 ---- .../style_04/style_04_machine_learning.yaml | 4 ---- .../letters_only/style_04/style_04_management.yaml | 4 ---- .../letters_only/style_04/style_04_marketing.yaml | 4 ---- .../style_04/style_04_medical_genetics.yaml | 4 ---- .../style_04/style_04_miscellaneous.yaml | 4 ---- .../style_04/style_04_moral_disputes.yaml | 4 ---- .../style_04/style_04_moral_scenarios.yaml | 4 ---- .../letters_only/style_04/style_04_nutrition.yaml | 4 ---- .../letters_only/style_04/style_04_philosophy.yaml | 4 ---- .../letters_only/style_04/style_04_prehistory.yaml | 4 ---- .../style_04/style_04_professional_accounting.yaml | 4 ---- .../style_04/style_04_professional_law.yaml | 4 ---- .../style_04/style_04_professional_medicine.yaml | 4 ---- .../style_04/style_04_professional_psychology.yaml | 4 ---- .../style_04/style_04_public_relations.yaml | 4 ---- .../style_04/style_04_security_studies.yaml | 4 ---- .../letters_only/style_04/style_04_sociology.yaml | 4 ---- .../style_04/style_04_us_foreign_policy.yaml | 4 ---- .../letters_only/style_04/style_04_virology.yaml | 4 ---- .../style_04/style_04_world_religions.yaml | 4 ---- .../letters_only/style_05/_template_yaml | 13 ------------- .../style_05/style_05_abstract_algebra.yaml | 4 ---- .../letters_only/style_05/style_05_anatomy.yaml | 4 ---- .../letters_only/style_05/style_05_astronomy.yaml | 4 ---- .../style_05/style_05_business_ethics.yaml | 4 ---- .../style_05/style_05_clinical_knowledge.yaml | 4 ---- .../style_05/style_05_college_biology.yaml | 4 ---- .../style_05/style_05_college_chemistry.yaml | 4 ---- .../style_05/style_05_college_computer_science.yaml | 4 ---- .../style_05/style_05_college_mathematics.yaml | 4 ---- .../style_05/style_05_college_medicine.yaml | 4 ---- .../style_05/style_05_college_physics.yaml | 4 ---- .../style_05/style_05_computer_security.yaml | 4 ---- .../style_05/style_05_conceptual_physics.yaml | 4 ---- .../style_05/style_05_econometrics.yaml | 4 ---- .../style_05/style_05_electrical_engineering.yaml | 4 ---- .../style_05/style_05_elementary_mathematics.yaml | 4 ---- .../style_05/style_05_formal_logic.yaml | 4 ---- .../style_05/style_05_global_facts.yaml | 4 ---- .../style_05/style_05_high_school_biology.yaml | 4 ---- .../style_05/style_05_high_school_chemistry.yaml | 4 ---- .../style_05_high_school_computer_science.yaml | 4 ---- .../style_05_high_school_european_history.yaml | 4 ---- .../style_05/style_05_high_school_geography.yaml | 4 ---- ...tyle_05_high_school_government_and_politics.yaml | 4 ---- .../style_05_high_school_macroeconomics.yaml | 4 ---- .../style_05/style_05_high_school_mathematics.yaml | 4 ---- .../style_05_high_school_microeconomics.yaml | 4 ---- .../style_05/style_05_high_school_physics.yaml | 4 ---- .../style_05/style_05_high_school_psychology.yaml | 4 ---- .../style_05/style_05_high_school_statistics.yaml | 4 ---- .../style_05/style_05_high_school_us_history.yaml | 4 ---- .../style_05_high_school_world_history.yaml | 4 ---- .../letters_only/style_05/style_05_human_aging.yaml | 4 ---- .../style_05/style_05_human_sexuality.yaml | 4 ---- .../style_05/style_05_international_law.yaml | 4 ---- .../style_05/style_05_jurisprudence.yaml | 4 ---- .../style_05/style_05_logical_fallacies.yaml | 4 ---- .../style_05/style_05_machine_learning.yaml | 4 ---- .../letters_only/style_05/style_05_management.yaml | 4 ---- .../letters_only/style_05/style_05_marketing.yaml | 4 ---- .../style_05/style_05_medical_genetics.yaml | 4 ---- .../style_05/style_05_miscellaneous.yaml | 4 ---- .../style_05/style_05_moral_disputes.yaml | 4 ---- .../style_05/style_05_moral_scenarios.yaml | 4 ---- .../letters_only/style_05/style_05_nutrition.yaml | 4 ---- .../letters_only/style_05/style_05_philosophy.yaml | 4 ---- .../letters_only/style_05/style_05_prehistory.yaml | 4 ---- .../style_05/style_05_professional_accounting.yaml | 4 ---- .../style_05/style_05_professional_law.yaml | 4 ---- .../style_05/style_05_professional_medicine.yaml | 4 ---- .../style_05/style_05_professional_psychology.yaml | 4 ---- .../style_05/style_05_public_relations.yaml | 4 ---- .../style_05/style_05_security_studies.yaml | 4 ---- .../letters_only/style_05/style_05_sociology.yaml | 4 ---- .../style_05/style_05_us_foreign_policy.yaml | 4 ---- .../letters_only/style_05/style_05_virology.yaml | 4 ---- .../style_05/style_05_world_religions.yaml | 4 ---- 580 files changed, 2410 deletions(-) delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml deleted file mode 100644 index a42ebf3d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_01_fc -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}}\n(B) {{choices[1]}}\n(C) {{choices[2]}}\n(D) {{choices[3]}}\nA: " -doc_to_choice: "{{['(A) '+choices[0], '(B) '+choices[1], '(C) '+choices[2], '(D) '+choices[3]]}}" # "{{choices}}" # ["A", "B", "C", "D"] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml deleted file mode 100644 index 6a6a8643..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml deleted file mode 100644 index f0a6d17b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml deleted file mode 100644 index cf182ebb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml deleted file mode 100644 index 54f8af54..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml deleted file mode 100644 index 8f49c1db..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml deleted file mode 100644 index bf6d1c4d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml deleted file mode 100644 index 5e34ce01..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml deleted file mode 100644 index cdf093a1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml deleted file mode 100644 index 23c08a30..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml deleted file mode 100644 index 478aa690..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml deleted file mode 100644 index ae450418..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml deleted file mode 100644 index 6242db7d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml deleted file mode 100644 index 1d0e623a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml deleted file mode 100644 index 92c0cf93..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml deleted file mode 100644 index 0fcce024..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml deleted file mode 100644 index 00e3b422..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml deleted file mode 100644 index 6ee8b4bf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml deleted file mode 100644 index aa601e97..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml deleted file mode 100644 index 705b4eba..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml deleted file mode 100644 index 755abc3b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml deleted file mode 100644 index a93522e8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml deleted file mode 100644 index 85779283..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml deleted file mode 100644 index 13b47bc0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml deleted file mode 100644 index c4d0baa6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml deleted file mode 100644 index b2b1b2e9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml deleted file mode 100644 index 95257104..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml deleted file mode 100644 index 13b10862..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml deleted file mode 100644 index 3adfaf4d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml deleted file mode 100644 index 6e8f2841..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml deleted file mode 100644 index fa1d1033..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml deleted file mode 100644 index 71d33cc3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml deleted file mode 100644 index 0c1373f0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml deleted file mode 100644 index 2049c970..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml deleted file mode 100644 index cfe7e9a4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml deleted file mode 100644 index e24395bf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml deleted file mode 100644 index 4aeac5f7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml deleted file mode 100644 index d0299c7e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml deleted file mode 100644 index 2dd00d57..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml deleted file mode 100644 index 85db5f7e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml deleted file mode 100644 index e149454a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml deleted file mode 100644 index 030c4245..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml deleted file mode 100644 index 5ac6c654..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml deleted file mode 100644 index 89dd4970..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml deleted file mode 100644 index 643a3dac..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml deleted file mode 100644 index e7c52ba9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml deleted file mode 100644 index 0b669ded..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml deleted file mode 100644 index baed2437..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml deleted file mode 100644 index 86cfa62c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml deleted file mode 100644 index 415383d1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml deleted file mode 100644 index 370881b1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml deleted file mode 100644 index 83b0fb51..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml deleted file mode 100644 index f812fb1d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml deleted file mode 100644 index aed4580f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml deleted file mode 100644 index ec71095a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml deleted file mode 100644 index 894433eb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml deleted file mode 100644 index abadbf46..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml deleted file mode 100644 index fec1a23d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_01/style_01_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml deleted file mode 100644 index 5ba05e51..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_02_fc -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nA: " -doc_to_choice: "{{['A. '+choices[0], 'B. '+choices[1], 'C. '+choices[2], 'D. '+choices[3]]}}" -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml deleted file mode 100644 index 993f4ed4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml deleted file mode 100644 index 154c8bde..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml deleted file mode 100644 index 80b5f529..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml deleted file mode 100644 index d401a727..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml deleted file mode 100644 index 88062d80..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml deleted file mode 100644 index 2935671f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml deleted file mode 100644 index 96ca75d7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml deleted file mode 100644 index 1b77265b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml deleted file mode 100644 index c2264362..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml deleted file mode 100644 index febcf8cc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml deleted file mode 100644 index 4685a383..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml deleted file mode 100644 index b70bd16b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml deleted file mode 100644 index f8fd546c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml deleted file mode 100644 index ce61dd15..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml deleted file mode 100644 index c2b49225..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml deleted file mode 100644 index 1d3f8ec6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml deleted file mode 100644 index 1a626cd3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml deleted file mode 100644 index d08193c9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml deleted file mode 100644 index 505a9c3c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml deleted file mode 100644 index 540e5c7c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml deleted file mode 100644 index 0e628287..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml deleted file mode 100644 index b202e393..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml deleted file mode 100644 index 1ccfbc31..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml deleted file mode 100644 index 2a6fdcf1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml deleted file mode 100644 index c6b0d4f7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml deleted file mode 100644 index 17e7a054..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml deleted file mode 100644 index 53a36c3d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml deleted file mode 100644 index d65678c7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml deleted file mode 100644 index bb4940d9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml deleted file mode 100644 index ea8f90fc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml deleted file mode 100644 index 0a00e301..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml deleted file mode 100644 index d43c94bf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml deleted file mode 100644 index 82510fdb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml deleted file mode 100644 index 3351ff16..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml deleted file mode 100644 index c2e2d8ad..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml deleted file mode 100644 index a281191c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml deleted file mode 100644 index 248e2e38..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml deleted file mode 100644 index 990f6d0c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml deleted file mode 100644 index 72f13b4a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml deleted file mode 100644 index 93d298d6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml deleted file mode 100644 index 45737778..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml deleted file mode 100644 index 5f570691..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml deleted file mode 100644 index f8388171..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml deleted file mode 100644 index b4dbfbcf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml deleted file mode 100644 index f0a1af26..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml deleted file mode 100644 index 4455fc2a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml deleted file mode 100644 index 4c6c0b9a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml deleted file mode 100644 index 4ae50925..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml deleted file mode 100644 index ddf67b8b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml deleted file mode 100644 index b8a1e1c0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml deleted file mode 100644 index 343c3d35..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml deleted file mode 100644 index 15cbcc5c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml deleted file mode 100644 index 2315f848..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml deleted file mode 100644 index 97fa859e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml deleted file mode 100644 index dd5510be..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml deleted file mode 100644 index 0ebdc549..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml deleted file mode 100644 index ab076ec5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_02/style_02_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml deleted file mode 100644 index 1f7f90cf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_03_fc -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\nA) {{choices[0]}}\nB) {{choices[1]}}\nC) {{choices[2]}}\nD) {{choices[3]}}\nA: " -doc_to_choice: "{{['A) '+choices[0], 'B) '+choices[1], 'C) '+choices[2], 'D) '+choices[3]]}}" # "{{choices}}" # ["A", "B", "C", "D"] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml deleted file mode 100644 index 00454841..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml deleted file mode 100644 index 8fc0c2ec..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml deleted file mode 100644 index dc7d0971..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml deleted file mode 100644 index e245bd91..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml deleted file mode 100644 index 4da57590..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml deleted file mode 100644 index 33d821b6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml deleted file mode 100644 index 246d5c0d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml deleted file mode 100644 index 393b6636..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml deleted file mode 100644 index 0636d0b5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml deleted file mode 100644 index 8c189982..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml deleted file mode 100644 index 9b0f07cb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml deleted file mode 100644 index 40f6a6c7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml deleted file mode 100644 index 0ffc95d9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml deleted file mode 100644 index f538431f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml deleted file mode 100644 index 4c21bede..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml deleted file mode 100644 index 759dba03..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml deleted file mode 100644 index 2754379a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml deleted file mode 100644 index 997c431e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml deleted file mode 100644 index 626cfb14..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml deleted file mode 100644 index 566bd7b5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml deleted file mode 100644 index 816448e2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml deleted file mode 100644 index 1bda17bb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml deleted file mode 100644 index cca80255..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml deleted file mode 100644 index 08dffd01..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml deleted file mode 100644 index 4a259293..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml deleted file mode 100644 index c09513e8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml deleted file mode 100644 index 653ad185..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml deleted file mode 100644 index 3232299f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml deleted file mode 100644 index 7ae5bfa0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml deleted file mode 100644 index 1788a7cb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml deleted file mode 100644 index 1cc70773..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml deleted file mode 100644 index 874dac59..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml deleted file mode 100644 index 6df374ce..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml deleted file mode 100644 index 1aac6d66..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml deleted file mode 100644 index 1493eef3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml deleted file mode 100644 index cc43e5f2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml deleted file mode 100644 index 69d01e84..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml deleted file mode 100644 index 241765c3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml deleted file mode 100644 index 52b3d13c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml deleted file mode 100644 index 7ae05fde..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml deleted file mode 100644 index 526f5a3c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml deleted file mode 100644 index 52d46c09..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml deleted file mode 100644 index 2eeba617..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml deleted file mode 100644 index 4ab76556..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml deleted file mode 100644 index 26318da3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml deleted file mode 100644 index d011f57f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml deleted file mode 100644 index 5ff8a000..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml deleted file mode 100644 index 9b19f66b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml deleted file mode 100644 index bb32235e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml deleted file mode 100644 index f64d2914..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml deleted file mode 100644 index c730d29b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml deleted file mode 100644 index fe784f69..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml deleted file mode 100644 index 5f7fc937..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml deleted file mode 100644 index 799ab788..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml deleted file mode 100644 index 81eb5ca6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml deleted file mode 100644 index dd64c766..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml deleted file mode 100644 index 86f83eeb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_03/style_03_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml deleted file mode 100644 index 86296725..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_04_fc -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\nA, {{choices[0]}}\nB, {{choices[1]}}\nC, {{choices[2]}}\nD, {{choices[3]}}\nA: " -doc_to_choice: "{{['A, '+choices[0], 'B, '+choices[1], 'C, '+choices[2], 'D, '+choices[3]]}}" # "{{choices}}" # ["(A)", "(B)", "(C)", "(D)"] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml deleted file mode 100644 index 9e98fe5b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml deleted file mode 100644 index 7f6d4fc6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml deleted file mode 100644 index a679f24a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml deleted file mode 100644 index 643b18f1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml deleted file mode 100644 index e0c53ae4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml deleted file mode 100644 index 210863b6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml deleted file mode 100644 index 948977c0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml deleted file mode 100644 index e6a20393..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml deleted file mode 100644 index 1fa68f36..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml deleted file mode 100644 index 0b557e91..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml deleted file mode 100644 index 9ea539e5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml deleted file mode 100644 index e8713f39..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml deleted file mode 100644 index b348ae36..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml deleted file mode 100644 index af5812be..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml deleted file mode 100644 index 66a5e9d5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml deleted file mode 100644 index ac85b2e6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml deleted file mode 100644 index 24de56a1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml deleted file mode 100644 index 1bf3a402..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml deleted file mode 100644 index 90a1142d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml deleted file mode 100644 index 8a55d49b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml deleted file mode 100644 index bedf826d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml deleted file mode 100644 index 922b30b2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml deleted file mode 100644 index c64045c5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml deleted file mode 100644 index 51e1f917..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml deleted file mode 100644 index 0e06b5df..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml deleted file mode 100644 index 66c9ecb2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml deleted file mode 100644 index 641933b2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml deleted file mode 100644 index 30424296..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml deleted file mode 100644 index 39b8eb36..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml deleted file mode 100644 index 07c41317..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml deleted file mode 100644 index 6c6a10bb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml deleted file mode 100644 index a540f03d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml deleted file mode 100644 index 540daaae..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml deleted file mode 100644 index dc5407d2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml deleted file mode 100644 index fbc74f36..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml deleted file mode 100644 index 522a5d45..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml deleted file mode 100644 index f7ce9484..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml deleted file mode 100644 index 38a6d060..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml deleted file mode 100644 index 90925876..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml deleted file mode 100644 index 85f6acd4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml deleted file mode 100644 index 6cad25b5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml deleted file mode 100644 index fad34d46..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml deleted file mode 100644 index fdb07f53..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml deleted file mode 100644 index 21299b26..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml deleted file mode 100644 index 551af716..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml deleted file mode 100644 index 2b88a867..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml deleted file mode 100644 index 0a416e4c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml deleted file mode 100644 index d9e1cfcd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml deleted file mode 100644 index 9606f5a7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml deleted file mode 100644 index 16a9320e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml deleted file mode 100644 index 229b5200..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml deleted file mode 100644 index 161fac6f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml deleted file mode 100644 index 2489444d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml deleted file mode 100644 index ca7c5b9d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml deleted file mode 100644 index 538aaf90..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml deleted file mode 100644 index 3f7bad4d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml deleted file mode 100644 index 66cc040e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_04/style_04_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml deleted file mode 100644 index e61f63e3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_05_fc -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Answer the following question with fruit that represents the options: {{question.strip()}}\n(Apple) {{choices[0]}}\n(Banana) {{choices[1]}}\n(Cantaloupe) {{choices[2]}}\n(Durian) {{choices[3]}}\nAnswer: " -doc_to_choice: "{{['(Apple) '+choices[0], '(Banana) '+choices[1], '(Cantaloupe) '+choices[2], '(Durian) '+choices[3]]}}" # "{{choices}}" # ["A", "B", "C", "D"] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml deleted file mode 100644 index db455b3c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml deleted file mode 100644 index 86bbcc4d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml deleted file mode 100644 index 73201b0e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml deleted file mode 100644 index 3cbda755..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml deleted file mode 100644 index 28dbe325..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml deleted file mode 100644 index 56db146e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml deleted file mode 100644 index aab517c9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml deleted file mode 100644 index 1e45d4d0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml deleted file mode 100644 index 3560688f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml deleted file mode 100644 index 49bce1bb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml deleted file mode 100644 index 8d3b96e2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml deleted file mode 100644 index 1d032b55..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml deleted file mode 100644 index b40a3c3f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml deleted file mode 100644 index 19ee3f1b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml deleted file mode 100644 index 71c27923..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml deleted file mode 100644 index fbbde0da..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml deleted file mode 100644 index 98560803..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml deleted file mode 100644 index 5a99dcbe..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml deleted file mode 100644 index 19d0b64b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml deleted file mode 100644 index fe593e1a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml deleted file mode 100644 index 4c0f2c9f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml deleted file mode 100644 index 367ee3cd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml deleted file mode 100644 index 1003c063..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml deleted file mode 100644 index fb504698..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml deleted file mode 100644 index 7f727304..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml deleted file mode 100644 index d7514f5e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml deleted file mode 100644 index 578e0936..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml deleted file mode 100644 index 290bf34e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml deleted file mode 100644 index 6a0f84b7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml deleted file mode 100644 index 5f53106e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml deleted file mode 100644 index a021c530..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml deleted file mode 100644 index b035ff06..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml deleted file mode 100644 index dd1a98fb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml deleted file mode 100644 index 907fb5e4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml deleted file mode 100644 index 1b7762ab..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml deleted file mode 100644 index 867f5208..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml deleted file mode 100644 index b783548e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml deleted file mode 100644 index 5fac78f3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml deleted file mode 100644 index 44d12096..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml deleted file mode 100644 index d1012c53..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml deleted file mode 100644 index 97383250..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml deleted file mode 100644 index ddc49653..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml deleted file mode 100644 index 234a386c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml deleted file mode 100644 index 9a112318..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml deleted file mode 100644 index 6198a8e7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml deleted file mode 100644 index e137e42f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml deleted file mode 100644 index fb7e0a83..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml deleted file mode 100644 index f2c5bdde..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml deleted file mode 100644 index 7d3877ef..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml deleted file mode 100644 index a97eabc3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml deleted file mode 100644 index 55a99fc6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml deleted file mode 100644 index 64c6762f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml deleted file mode 100644 index 8bd9f339..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml deleted file mode 100644 index e9416042..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml deleted file mode 100644 index c0bddee1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml deleted file mode 100644 index 2db007e4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml deleted file mode 100644 index fa939813..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/full_continuation/style_05/style_05_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_fc_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml deleted file mode 100644 index 5d605a67..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_01_lo -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}}\n(B) {{choices[1]}}\n(C) {{choices[2]}}\n(D) {{choices[3]}}\nA: " -doc_to_choice: ['(A)', '(B)', '(C)', '(D)'] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml deleted file mode 100644 index 558c5e52..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml deleted file mode 100644 index b01ebedb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml deleted file mode 100644 index f8eb0b16..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml deleted file mode 100644 index 07f094ee..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml deleted file mode 100644 index 1523a6ee..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml deleted file mode 100644 index 7da6f830..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml deleted file mode 100644 index 4cb4c646..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml deleted file mode 100644 index a4fc1770..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml deleted file mode 100644 index 781a93ea..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml deleted file mode 100644 index 26c72102..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml deleted file mode 100644 index 18c4b6c9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml deleted file mode 100644 index b93acb38..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml deleted file mode 100644 index d817b140..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml deleted file mode 100644 index 2a68646b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml deleted file mode 100644 index 31ad2fa6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml deleted file mode 100644 index 29ffa115..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml deleted file mode 100644 index 370b1018..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml deleted file mode 100644 index 45f504ef..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml deleted file mode 100644 index 5fd5912d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml deleted file mode 100644 index 15d0c893..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml deleted file mode 100644 index f124095e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml deleted file mode 100644 index cbf30b84..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml deleted file mode 100644 index f6d7e3df..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml deleted file mode 100644 index 9816dbcd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml deleted file mode 100644 index 1b819d09..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml deleted file mode 100644 index 0a14903e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml deleted file mode 100644 index 5dca1e99..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml deleted file mode 100644 index 7b28660c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml deleted file mode 100644 index f8e66745..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml deleted file mode 100644 index c2bd8b8c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml deleted file mode 100644 index c7c3f7b3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml deleted file mode 100644 index e425eb37..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml deleted file mode 100644 index 010f23df..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml deleted file mode 100644 index 481026d7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml deleted file mode 100644 index 036b361a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml deleted file mode 100644 index ccc6bf56..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml deleted file mode 100644 index a0e27f9f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml deleted file mode 100644 index 309b2089..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml deleted file mode 100644 index 4927b36f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml deleted file mode 100644 index 28df2e3a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml deleted file mode 100644 index abd21d98..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml deleted file mode 100644 index 770938dc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml deleted file mode 100644 index 9abe2973..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml deleted file mode 100644 index c97b0702..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml deleted file mode 100644 index a3cb37c0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml deleted file mode 100644 index 362ac1b2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml deleted file mode 100644 index be6e7a5e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml deleted file mode 100644 index 75633be7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml deleted file mode 100644 index f09e5a3c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml deleted file mode 100644 index b78942f7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml deleted file mode 100644 index 93b289d6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml deleted file mode 100644 index 8a9cc49c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml deleted file mode 100644 index c40f85dd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml deleted file mode 100644 index 2abcd7ef..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml deleted file mode 100644 index 87e3bd95..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml deleted file mode 100644 index cca3fddd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml deleted file mode 100644 index 147ec4a7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_01/style_01_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml deleted file mode 100644 index 9f444a5c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_02_lo -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nA: " -doc_to_choice: ['A.', 'B.', 'C.', 'D.'] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml deleted file mode 100644 index 7bbc0305..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml deleted file mode 100644 index 6ad5fc90..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml deleted file mode 100644 index 921bbae3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml deleted file mode 100644 index 30c61b85..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml deleted file mode 100644 index 0d828f8c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml deleted file mode 100644 index 77208f67..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml deleted file mode 100644 index 0b98e1c1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml deleted file mode 100644 index 0c4fdbd5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml deleted file mode 100644 index 6db966a3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml deleted file mode 100644 index 0c75eb2b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml deleted file mode 100644 index e2207cf5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml deleted file mode 100644 index edc01a6b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml deleted file mode 100644 index 015681ae..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml deleted file mode 100644 index c315cff4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml deleted file mode 100644 index fa370bbe..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml deleted file mode 100644 index f481e971..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml deleted file mode 100644 index b9d6a1ca..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml deleted file mode 100644 index a8990f95..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml deleted file mode 100644 index 8e16593f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml deleted file mode 100644 index 506127e5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml deleted file mode 100644 index f5fa9c49..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml deleted file mode 100644 index 5d8a413c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml deleted file mode 100644 index 150affea..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml deleted file mode 100644 index 4722d231..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml deleted file mode 100644 index d14d2db6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml deleted file mode 100644 index e1adf87b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml deleted file mode 100644 index 5d994ad4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml deleted file mode 100644 index 5699e2be..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml deleted file mode 100644 index 814ae0a7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml deleted file mode 100644 index bf2d2d38..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml deleted file mode 100644 index 8cefc8e3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml deleted file mode 100644 index 901b1386..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml deleted file mode 100644 index ef484740..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml deleted file mode 100644 index 567a0494..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml deleted file mode 100644 index e09cead2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml deleted file mode 100644 index 7643ef29..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml deleted file mode 100644 index 6e8d9618..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml deleted file mode 100644 index aa324ad5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml deleted file mode 100644 index 75781818..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml deleted file mode 100644 index d1a2333a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml deleted file mode 100644 index 8c97c867..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml deleted file mode 100644 index 77dd129a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml deleted file mode 100644 index cbf95df9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml deleted file mode 100644 index 941410b4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml deleted file mode 100644 index b49ca05c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml deleted file mode 100644 index 08c23643..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml deleted file mode 100644 index 8106aea7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml deleted file mode 100644 index 75ad6a33..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml deleted file mode 100644 index 3c8e4e8b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml deleted file mode 100644 index 37b2b3a0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml deleted file mode 100644 index 34af2faa..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml deleted file mode 100644 index 6d3ca3c5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml deleted file mode 100644 index 3ef58bb8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml deleted file mode 100644 index 2bd3662d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml deleted file mode 100644 index b268c344..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml deleted file mode 100644 index 1a4a27e6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml deleted file mode 100644 index f262df0e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_02/style_02_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_02_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml deleted file mode 100644 index 0c9200d8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_03_lo -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\nA) {{choices[0]}}\nB) {{choices[1]}}\nC) {{choices[2]}}\nD) {{choices[3]}}\nA: " -doc_to_choice: ['A)', 'B)', 'C)', 'D)'] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml deleted file mode 100644 index 2a4eb20b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml deleted file mode 100644 index 81b9e739..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml deleted file mode 100644 index 7b8cd957..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml deleted file mode 100644 index 07e9dcf0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml deleted file mode 100644 index 5ff6bb05..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml deleted file mode 100644 index 69e449da..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml deleted file mode 100644 index 7821d3eb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml deleted file mode 100644 index c4523a31..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml deleted file mode 100644 index 412ac6ef..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml deleted file mode 100644 index 7ac69116..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml deleted file mode 100644 index 756ff002..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml deleted file mode 100644 index c0ce5bb4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml deleted file mode 100644 index 49a67c57..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml deleted file mode 100644 index 6dab21fe..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml deleted file mode 100644 index 1020b58b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml deleted file mode 100644 index 741d91ff..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml deleted file mode 100644 index b7ddf861..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml deleted file mode 100644 index 3cf5e788..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml deleted file mode 100644 index b8fa3952..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml deleted file mode 100644 index 9c407ec0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml deleted file mode 100644 index eb0e9358..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml deleted file mode 100644 index 95df7e0a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml deleted file mode 100644 index 887fbe07..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml deleted file mode 100644 index e25c7b5c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml deleted file mode 100644 index 9765f81d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml deleted file mode 100644 index 505a0e83..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml deleted file mode 100644 index 01ec11cb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml deleted file mode 100644 index 804a4925..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml deleted file mode 100644 index 759293a7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml deleted file mode 100644 index f9b4593e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml deleted file mode 100644 index dad11f7d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml deleted file mode 100644 index ef5bb47a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml deleted file mode 100644 index 240e0b83..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml deleted file mode 100644 index e90be5bd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml deleted file mode 100644 index 24fc0971..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml deleted file mode 100644 index 0936c265..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml deleted file mode 100644 index f4e0f297..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml deleted file mode 100644 index 271e9eb6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml deleted file mode 100644 index 1bc0eb54..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml deleted file mode 100644 index 0255fa98..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml deleted file mode 100644 index 18fce675..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml deleted file mode 100644 index 4d36997b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml deleted file mode 100644 index ffca74f0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml deleted file mode 100644 index 182287dc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml deleted file mode 100644 index aca00704..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml deleted file mode 100644 index 08b87d0f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml deleted file mode 100644 index f526b91e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml deleted file mode 100644 index 0062e96b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml deleted file mode 100644 index 9e597455..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml deleted file mode 100644 index 820f26ac..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml deleted file mode 100644 index cbceb92e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml deleted file mode 100644 index 500746af..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml deleted file mode 100644 index 62d1409a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml deleted file mode 100644 index b08ed0d7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml deleted file mode 100644 index 94c05159..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml deleted file mode 100644 index 1f9e8964..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml deleted file mode 100644 index 92548c68..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_03/style_03_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_03_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml deleted file mode 100644 index c0dfb525..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_04_lo -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Q: {{question.strip()}}\nA, {{choices[0]}}\nB, {{choices[1]}}\nC, {{choices[2]}}\nD, {{choices[3]}}\nA: " -doc_to_choice: ['A,', 'B,', 'C,', 'D,'] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml deleted file mode 100644 index 8d2f8098..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml deleted file mode 100644 index e26117a4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml deleted file mode 100644 index 3998c320..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml deleted file mode 100644 index 3ffa155e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml deleted file mode 100644 index 69ce9fa4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml deleted file mode 100644 index ecc2125d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml deleted file mode 100644 index 7c060ed8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml deleted file mode 100644 index 90ff5939..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml deleted file mode 100644 index 2bded9b5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml deleted file mode 100644 index 37e93c56..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml deleted file mode 100644 index 75722ef1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml deleted file mode 100644 index 6698f17f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml deleted file mode 100644 index f1810d32..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml deleted file mode 100644 index b9f5a6e3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml deleted file mode 100644 index 2e5b0d36..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml deleted file mode 100644 index c56e5058..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml deleted file mode 100644 index 5986c2ef..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml deleted file mode 100644 index 84e3b7f8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml deleted file mode 100644 index 07c716b0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml deleted file mode 100644 index e71aa530..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml deleted file mode 100644 index 40b56a11..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml deleted file mode 100644 index dc6021fc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml deleted file mode 100644 index 1034fe77..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml deleted file mode 100644 index 3b8857ec..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml deleted file mode 100644 index 4568abab..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml deleted file mode 100644 index 704025a1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml deleted file mode 100644 index 4870affe..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml deleted file mode 100644 index 95aa699e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml deleted file mode 100644 index 3ea1114b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml deleted file mode 100644 index 4c80a489..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml deleted file mode 100644 index ccd9fbcb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml deleted file mode 100644 index 40f70676..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml deleted file mode 100644 index 65510f56..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml deleted file mode 100644 index aff1870e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml deleted file mode 100644 index 17333211..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml deleted file mode 100644 index 295f77ad..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml deleted file mode 100644 index 76f66318..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml deleted file mode 100644 index 1acaf925..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml deleted file mode 100644 index 478800d6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml deleted file mode 100644 index ca1290aa..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml deleted file mode 100644 index a20091a3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml deleted file mode 100644 index 75486c12..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml deleted file mode 100644 index c7b6dc5b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml deleted file mode 100644 index 6c5214d8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml deleted file mode 100644 index 82696658..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml deleted file mode 100644 index b1c5c0ef..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml deleted file mode 100644 index d9833825..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml deleted file mode 100644 index 28a00e14..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml deleted file mode 100644 index c9a9be10..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml deleted file mode 100644 index 6696d250..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml deleted file mode 100644 index f5ea9d3c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml deleted file mode 100644 index 9eb5b35b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml deleted file mode 100644 index 427c7679..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml deleted file mode 100644 index 2d00d5c9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml deleted file mode 100644 index 2f6c7243..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml deleted file mode 100644 index ce0cd658..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml deleted file mode 100644 index 9828954e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_04/style_04_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_04_lo_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml deleted file mode 100644 index cf0c1ed3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/_template_yaml +++ /dev/null @@ -1,13 +0,0 @@ -group: mmlu_style_05_lo -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: "Answer the following question with fruit that represents the options: {{question.strip()}}\n(Apple) {{choices[0]}}\n(Banana) {{choices[1]}}\n(Cantaloupe) {{choices[2]}}\n(Durian) {{choices[3]}}\nAnswer: " -doc_to_choice: ['(Apple)', '(Banana)', '(Cantaloupe)', '(Durian)'] -doc_to_target: answer -metric_list: - - metric: acc - aggregation: mean - higher_is_better: true - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml deleted file mode 100644 index 4b032a41..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml deleted file mode 100644 index a634aced..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml deleted file mode 100644 index 1701ab8f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml deleted file mode 100644 index 317c27bf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml deleted file mode 100644 index 7694fa1a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml deleted file mode 100644 index 8ef40714..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml deleted file mode 100644 index 805f9fa4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml deleted file mode 100644 index bb51328c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml deleted file mode 100644 index 19d655f0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml deleted file mode 100644 index b99d2df0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml deleted file mode 100644 index f5f13fbc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml deleted file mode 100644 index 54c44811..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml deleted file mode 100644 index 90678e03..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml deleted file mode 100644 index 471530eb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml deleted file mode 100644 index 0dc9e1aa..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml deleted file mode 100644 index f2e08287..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml deleted file mode 100644 index 7d965de1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml deleted file mode 100644 index 4854f37b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml deleted file mode 100644 index eef05492..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml deleted file mode 100644 index fbfaf14e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml deleted file mode 100644 index fd2e7d92..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml deleted file mode 100644 index 0162142a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml deleted file mode 100644 index 4477cce5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml deleted file mode 100644 index de1aab96..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml deleted file mode 100644 index e0a9e76f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml deleted file mode 100644 index b5d2a35e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml deleted file mode 100644 index 652a999a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml deleted file mode 100644 index bd9223c6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml deleted file mode 100644 index 11d44aa7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml deleted file mode 100644 index 25ec5d75..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml deleted file mode 100644 index 637beb83..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml deleted file mode 100644 index 0167efc4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml deleted file mode 100644 index 455c9d1c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml deleted file mode 100644 index 70e40d7f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml deleted file mode 100644 index 4b26da35..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml deleted file mode 100644 index efeef8f1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml deleted file mode 100644 index 5495dd43..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml deleted file mode 100644 index ff028543..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml deleted file mode 100644 index d306e0dd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml deleted file mode 100644 index 2843c86a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml deleted file mode 100644 index 056600b1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml deleted file mode 100644 index e33009b6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml deleted file mode 100644 index 89565096..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml deleted file mode 100644 index 02d93244..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml deleted file mode 100644 index 056cb1be..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml deleted file mode 100644 index 83195d6e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml deleted file mode 100644 index ed40e6cb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml deleted file mode 100644 index f9450fe7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml deleted file mode 100644 index d2d2b1e4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml deleted file mode 100644 index bc808a89..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml deleted file mode 100644 index bc5a4d36..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml deleted file mode 100644 index 62afb7b4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml deleted file mode 100644 index bf2bb7c2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml deleted file mode 100644 index 801db8ce..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml deleted file mode 100644 index 8402d83d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml deleted file mode 100644 index 9cf4d92b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml deleted file mode 100644 index 6697992e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/letters_only/style_05/style_05_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_05_lo_world_religions" -- GitLab From cc572624746bd2e28b75ea7e8e32f1a9cee6b94c Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 6 Dec 2023 08:23:10 +0000 Subject: [PATCH 20/50] add style 01 --- .../style_01/a/_template_yaml | 15 ++++ .../style_01/a/style_01_abstract_algebra.yaml | 4 + .../style_01/a/style_01_anatomy.yaml | 4 + .../style_01/a/style_01_astronomy.yaml | 4 + .../style_01/a/style_01_business_ethics.yaml | 4 + .../a/style_01_clinical_knowledge.yaml | 4 + .../style_01/a/style_01_college_biology.yaml | 4 + .../a/style_01_college_chemistry.yaml | 4 + .../a/style_01_college_computer_science.yaml | 4 + .../a/style_01_college_mathematics.yaml | 4 + .../style_01/a/style_01_college_medicine.yaml | 4 + .../style_01/a/style_01_college_physics.yaml | 4 + .../a/style_01_computer_security.yaml | 4 + .../a/style_01_conceptual_physics.yaml | 4 + .../style_01/a/style_01_econometrics.yaml | 4 + .../a/style_01_electrical_engineering.yaml | 4 + .../a/style_01_elementary_mathematics.yaml | 4 + .../style_01/a/style_01_formal_logic.yaml | 4 + .../style_01/a/style_01_global_facts.yaml | 4 + .../a/style_01_high_school_biology.yaml | 4 + .../a/style_01_high_school_chemistry.yaml | 4 + ...style_01_high_school_computer_science.yaml | 4 + ...style_01_high_school_european_history.yaml | 4 + .../a/style_01_high_school_geography.yaml | 4 + ...1_high_school_government_and_politics.yaml | 4 + .../style_01_high_school_macroeconomics.yaml | 4 + .../a/style_01_high_school_mathematics.yaml | 4 + .../style_01_high_school_microeconomics.yaml | 4 + .../a/style_01_high_school_physics.yaml | 4 + .../a/style_01_high_school_psychology.yaml | 4 + .../a/style_01_high_school_statistics.yaml | 4 + .../a/style_01_high_school_us_history.yaml | 4 + .../a/style_01_high_school_world_history.yaml | 4 + .../style_01/a/style_01_human_aging.yaml | 4 + .../style_01/a/style_01_human_sexuality.yaml | 4 + .../a/style_01_international_law.yaml | 4 + .../style_01/a/style_01_jurisprudence.yaml | 4 + .../a/style_01_logical_fallacies.yaml | 4 + .../style_01/a/style_01_machine_learning.yaml | 4 + .../style_01/a/style_01_management.yaml | 4 + .../style_01/a/style_01_marketing.yaml | 4 + .../style_01/a/style_01_medical_genetics.yaml | 4 + .../style_01/a/style_01_miscellaneous.yaml | 4 + .../style_01/a/style_01_moral_disputes.yaml | 4 + .../style_01/a/style_01_moral_scenarios.yaml | 4 + .../style_01/a/style_01_nutrition.yaml | 4 + .../style_01/a/style_01_philosophy.yaml | 4 + .../style_01/a/style_01_prehistory.yaml | 4 + .../a/style_01_professional_accounting.yaml | 4 + .../style_01/a/style_01_professional_law.yaml | 4 + .../a/style_01_professional_medicine.yaml | 4 + .../a/style_01_professional_psychology.yaml | 4 + .../style_01/a/style_01_public_relations.yaml | 4 + .../style_01/a/style_01_security_studies.yaml | 4 + .../style_01/a/style_01_sociology.yaml | 4 + .../a/style_01_us_foreign_policy.yaml | 4 + .../style_01/a/style_01_virology.yaml | 4 + .../style_01/a/style_01_world_religions.yaml | 4 + .../style_01/b/_template_yaml | 15 ++++ .../style_01/b/style_01_abstract_algebra.yaml | 4 + .../style_01/b/style_01_anatomy.yaml | 4 + .../style_01/b/style_01_astronomy.yaml | 4 + .../style_01/b/style_01_business_ethics.yaml | 4 + .../b/style_01_clinical_knowledge.yaml | 4 + .../style_01/b/style_01_college_biology.yaml | 4 + .../b/style_01_college_chemistry.yaml | 4 + .../b/style_01_college_computer_science.yaml | 4 + .../b/style_01_college_mathematics.yaml | 4 + .../style_01/b/style_01_college_medicine.yaml | 4 + .../style_01/b/style_01_college_physics.yaml | 4 + .../b/style_01_computer_security.yaml | 4 + .../b/style_01_conceptual_physics.yaml | 4 + .../style_01/b/style_01_econometrics.yaml | 4 + .../b/style_01_electrical_engineering.yaml | 4 + .../b/style_01_elementary_mathematics.yaml | 4 + .../style_01/b/style_01_formal_logic.yaml | 4 + .../style_01/b/style_01_global_facts.yaml | 4 + .../b/style_01_high_school_biology.yaml | 4 + .../b/style_01_high_school_chemistry.yaml | 4 + ...style_01_high_school_computer_science.yaml | 4 + ...style_01_high_school_european_history.yaml | 4 + .../b/style_01_high_school_geography.yaml | 4 + ...1_high_school_government_and_politics.yaml | 4 + .../style_01_high_school_macroeconomics.yaml | 4 + .../b/style_01_high_school_mathematics.yaml | 4 + .../style_01_high_school_microeconomics.yaml | 4 + .../b/style_01_high_school_physics.yaml | 4 + .../b/style_01_high_school_psychology.yaml | 4 + .../b/style_01_high_school_statistics.yaml | 4 + .../b/style_01_high_school_us_history.yaml | 4 + .../b/style_01_high_school_world_history.yaml | 4 + .../style_01/b/style_01_human_aging.yaml | 4 + .../style_01/b/style_01_human_sexuality.yaml | 4 + .../b/style_01_international_law.yaml | 4 + .../style_01/b/style_01_jurisprudence.yaml | 4 + .../b/style_01_logical_fallacies.yaml | 4 + .../style_01/b/style_01_machine_learning.yaml | 4 + .../style_01/b/style_01_management.yaml | 4 + .../style_01/b/style_01_marketing.yaml | 4 + .../style_01/b/style_01_medical_genetics.yaml | 4 + .../style_01/b/style_01_miscellaneous.yaml | 4 + .../style_01/b/style_01_moral_disputes.yaml | 4 + .../style_01/b/style_01_moral_scenarios.yaml | 4 + .../style_01/b/style_01_nutrition.yaml | 4 + .../style_01/b/style_01_philosophy.yaml | 4 + .../style_01/b/style_01_prehistory.yaml | 4 + .../b/style_01_professional_accounting.yaml | 4 + .../style_01/b/style_01_professional_law.yaml | 4 + .../b/style_01_professional_medicine.yaml | 4 + .../b/style_01_professional_psychology.yaml | 4 + .../style_01/b/style_01_public_relations.yaml | 4 + .../style_01/b/style_01_security_studies.yaml | 4 + .../style_01/b/style_01_sociology.yaml | 4 + .../b/style_01_us_foreign_policy.yaml | 4 + .../style_01/b/style_01_virology.yaml | 4 + .../style_01/b/style_01_world_religions.yaml | 4 + .../style_01/c/_template_yaml | 15 ++++ .../style_01/c/style_01_abstract_algebra.yaml | 4 + .../style_01/c/style_01_anatomy.yaml | 4 + .../style_01/c/style_01_astronomy.yaml | 4 + .../style_01/c/style_01_business_ethics.yaml | 4 + .../c/style_01_clinical_knowledge.yaml | 4 + .../style_01/c/style_01_college_biology.yaml | 4 + .../c/style_01_college_chemistry.yaml | 4 + .../c/style_01_college_computer_science.yaml | 4 + .../c/style_01_college_mathematics.yaml | 4 + .../style_01/c/style_01_college_medicine.yaml | 4 + .../style_01/c/style_01_college_physics.yaml | 4 + .../c/style_01_computer_security.yaml | 4 + .../c/style_01_conceptual_physics.yaml | 4 + .../style_01/c/style_01_econometrics.yaml | 4 + .../c/style_01_electrical_engineering.yaml | 4 + .../c/style_01_elementary_mathematics.yaml | 4 + .../style_01/c/style_01_formal_logic.yaml | 4 + .../style_01/c/style_01_global_facts.yaml | 4 + .../c/style_01_high_school_biology.yaml | 4 + .../c/style_01_high_school_chemistry.yaml | 4 + ...style_01_high_school_computer_science.yaml | 4 + ...style_01_high_school_european_history.yaml | 4 + .../c/style_01_high_school_geography.yaml | 4 + ...1_high_school_government_and_politics.yaml | 4 + .../style_01_high_school_macroeconomics.yaml | 4 + .../c/style_01_high_school_mathematics.yaml | 4 + .../style_01_high_school_microeconomics.yaml | 4 + .../c/style_01_high_school_physics.yaml | 4 + .../c/style_01_high_school_psychology.yaml | 4 + .../c/style_01_high_school_statistics.yaml | 4 + .../c/style_01_high_school_us_history.yaml | 4 + .../c/style_01_high_school_world_history.yaml | 4 + .../style_01/c/style_01_human_aging.yaml | 4 + .../style_01/c/style_01_human_sexuality.yaml | 4 + .../c/style_01_international_law.yaml | 4 + .../style_01/c/style_01_jurisprudence.yaml | 4 + .../c/style_01_logical_fallacies.yaml | 4 + .../style_01/c/style_01_machine_learning.yaml | 4 + .../style_01/c/style_01_management.yaml | 4 + .../style_01/c/style_01_marketing.yaml | 4 + .../style_01/c/style_01_medical_genetics.yaml | 4 + .../style_01/c/style_01_miscellaneous.yaml | 4 + .../style_01/c/style_01_moral_disputes.yaml | 4 + .../style_01/c/style_01_moral_scenarios.yaml | 4 + .../style_01/c/style_01_nutrition.yaml | 4 + .../style_01/c/style_01_philosophy.yaml | 4 + .../style_01/c/style_01_prehistory.yaml | 4 + .../c/style_01_professional_accounting.yaml | 4 + .../style_01/c/style_01_professional_law.yaml | 4 + .../c/style_01_professional_medicine.yaml | 4 + .../c/style_01_professional_psychology.yaml | 4 + .../style_01/c/style_01_public_relations.yaml | 4 + .../style_01/c/style_01_security_studies.yaml | 4 + .../style_01/c/style_01_sociology.yaml | 4 + .../c/style_01_us_foreign_policy.yaml | 4 + .../style_01/c/style_01_virology.yaml | 4 + .../style_01/c/style_01_world_religions.yaml | 4 + .../tasks/mmlu/alternative_worlds/styles.py | 86 +++++++++++++++++++ 175 files changed, 815 insertions(+) create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/styles.py diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml new file mode 100644 index 00000000..9b01f2d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_01 +group_alias: style_01 +task: mmlu_style_01a +task_alias: a +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../styles.template_01 +doc_to_choice: !function ../../styles.choice_01a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml new file mode 100644 index 00000000..adbde88f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml new file mode 100644 index 00000000..d0ac8cbf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml new file mode 100644 index 00000000..c4d6fc38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml new file mode 100644 index 00000000..41139a48 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml new file mode 100644 index 00000000..0741143c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml new file mode 100644 index 00000000..7e95f6a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml new file mode 100644 index 00000000..18bf8054 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml new file mode 100644 index 00000000..93250def --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml new file mode 100644 index 00000000..a1c2c7be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml new file mode 100644 index 00000000..ff64eb39 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml new file mode 100644 index 00000000..f4dcdf2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml new file mode 100644 index 00000000..d84981a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml new file mode 100644 index 00000000..bb859070 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml new file mode 100644 index 00000000..abfef42f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml new file mode 100644 index 00000000..1e20ea86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml new file mode 100644 index 00000000..4e139219 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml new file mode 100644 index 00000000..5e8aab5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml new file mode 100644 index 00000000..7d0aaee0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml new file mode 100644 index 00000000..2cf24f96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml new file mode 100644 index 00000000..32700f8e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml new file mode 100644 index 00000000..b2463f14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml new file mode 100644 index 00000000..62fe240d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml new file mode 100644 index 00000000..ea07f428 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml new file mode 100644 index 00000000..7dc6ba9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b8c2f90b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml new file mode 100644 index 00000000..02d4aa28 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml new file mode 100644 index 00000000..3c64337f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml new file mode 100644 index 00000000..8dd5fc59 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml new file mode 100644 index 00000000..650cd3de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml new file mode 100644 index 00000000..2bd87285 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml new file mode 100644 index 00000000..287e5479 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml new file mode 100644 index 00000000..a2d8b18a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml new file mode 100644 index 00000000..6226957b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml new file mode 100644 index 00000000..e8acd6f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml new file mode 100644 index 00000000..e1336f1e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml new file mode 100644 index 00000000..a2f5f14c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml new file mode 100644 index 00000000..51d83783 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml new file mode 100644 index 00000000..5912210e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml new file mode 100644 index 00000000..6e8ed63f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml new file mode 100644 index 00000000..0e437a32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml new file mode 100644 index 00000000..00734846 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml new file mode 100644 index 00000000..3dcf6d92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml new file mode 100644 index 00000000..a9fe3cfd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml new file mode 100644 index 00000000..7be4e0e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml new file mode 100644 index 00000000..462b97fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml new file mode 100644 index 00000000..60de6896 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml new file mode 100644 index 00000000..4cd9185e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml new file mode 100644 index 00000000..d44ae986 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml new file mode 100644 index 00000000..0c6252b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml new file mode 100644 index 00000000..dbe8ce5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml new file mode 100644 index 00000000..b1508047 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml new file mode 100644 index 00000000..6b5bd12c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml new file mode 100644 index 00000000..8214bf4f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml new file mode 100644 index 00000000..5eecad45 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml new file mode 100644 index 00000000..fbbdcdb0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml new file mode 100644 index 00000000..97d76e17 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml new file mode 100644 index 00000000..394792f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml new file mode 100644 index 00000000..888b0f9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_01 +group_alias: style_01 +task: mmlu_style_01b +task_alias: b +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../styles.template_01 +doc_to_choice: !function ../../styles.choice_01b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml new file mode 100644 index 00000000..fb1593c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml new file mode 100644 index 00000000..095e2962 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml new file mode 100644 index 00000000..6c8300d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml new file mode 100644 index 00000000..d18a16ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml new file mode 100644 index 00000000..7d7ff4eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml new file mode 100644 index 00000000..63e56071 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml new file mode 100644 index 00000000..165f1109 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml new file mode 100644 index 00000000..f9be6bc0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml new file mode 100644 index 00000000..f424c5b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml new file mode 100644 index 00000000..99dd5539 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml new file mode 100644 index 00000000..f86c0510 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml new file mode 100644 index 00000000..4bb06a83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml new file mode 100644 index 00000000..8d28fd03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml new file mode 100644 index 00000000..a5e5039f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml new file mode 100644 index 00000000..99b66fd1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml new file mode 100644 index 00000000..abbc36ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml new file mode 100644 index 00000000..1c062948 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml new file mode 100644 index 00000000..9ae058c8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml new file mode 100644 index 00000000..5c3557c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml new file mode 100644 index 00000000..cab37323 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml new file mode 100644 index 00000000..4cabb368 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml new file mode 100644 index 00000000..c84b7abd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml new file mode 100644 index 00000000..740c8de5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml new file mode 100644 index 00000000..9ec4827a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml new file mode 100644 index 00000000..eb4c62a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml new file mode 100644 index 00000000..eff1253f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml new file mode 100644 index 00000000..a27ba3e7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml new file mode 100644 index 00000000..07ddfae7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml new file mode 100644 index 00000000..0b602c8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml new file mode 100644 index 00000000..352b036a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml new file mode 100644 index 00000000..55f7521c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml new file mode 100644 index 00000000..5f525bd1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml new file mode 100644 index 00000000..f6d0d789 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml new file mode 100644 index 00000000..fba8f734 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml new file mode 100644 index 00000000..c3bb9083 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml new file mode 100644 index 00000000..2bd61c14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml new file mode 100644 index 00000000..4aa1173e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml new file mode 100644 index 00000000..7e7e244a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml new file mode 100644 index 00000000..76c5df3d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml new file mode 100644 index 00000000..0d5e4a76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml new file mode 100644 index 00000000..6de75a5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml new file mode 100644 index 00000000..c0f829fc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml new file mode 100644 index 00000000..41d0bff3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml new file mode 100644 index 00000000..d87aa1c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml new file mode 100644 index 00000000..3a8e8af7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml new file mode 100644 index 00000000..0d2a03e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml new file mode 100644 index 00000000..58e73666 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml new file mode 100644 index 00000000..5e26a638 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml new file mode 100644 index 00000000..672af7d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml new file mode 100644 index 00000000..f9f5622c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml new file mode 100644 index 00000000..7a866885 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml new file mode 100644 index 00000000..ba58dd4f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml new file mode 100644 index 00000000..9dd237f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml new file mode 100644 index 00000000..105075a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml new file mode 100644 index 00000000..fff71529 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml new file mode 100644 index 00000000..778b5d3d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml new file mode 100644 index 00000000..81c7f5f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml new file mode 100644 index 00000000..04c85bb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml @@ -0,0 +1,15 @@ +group: mmlu_style_01 +group_alias: style_01 +task: mmlu_style_01c +task_alias: c +dataset_path: cais/mmlu +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../styles.template_01 +doc_to_choice: !function ../../styles.choice_01c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml new file mode 100644 index 00000000..1d344a0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml @@ -0,0 +1,4 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml new file mode 100644 index 00000000..b4bb37b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml new file mode 100644 index 00000000..0b63fd38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml new file mode 100644 index 00000000..0ef5de0f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml new file mode 100644 index 00000000..4d8bcb85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml @@ -0,0 +1,4 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml new file mode 100644 index 00000000..68ca5d9b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml new file mode 100644 index 00000000..a15634ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml new file mode 100644 index 00000000..5a176107 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml new file mode 100644 index 00000000..1400ca3a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml new file mode 100644 index 00000000..63931106 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml new file mode 100644 index 00000000..2a8f60ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml new file mode 100644 index 00000000..4e1b6733 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml @@ -0,0 +1,4 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer security.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml new file mode 100644 index 00000000..b6bb8329 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml new file mode 100644 index 00000000..93d0af1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml new file mode 100644 index 00000000..02ada01a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml @@ -0,0 +1,4 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml new file mode 100644 index 00000000..8b6c4377 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml new file mode 100644 index 00000000..33195cfa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml @@ -0,0 +1,4 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml new file mode 100644 index 00000000..d2e391b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml @@ -0,0 +1,4 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global facts.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml new file mode 100644 index 00000000..f776e3af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml new file mode 100644 index 00000000..a0abfc1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml new file mode 100644 index 00000000..b6730455 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml new file mode 100644 index 00000000..8a0c0450 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml new file mode 100644 index 00000000..551cbd0b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml new file mode 100644 index 00000000..007b67a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml new file mode 100644 index 00000000..339f090a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml new file mode 100644 index 00000000..298c23df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml new file mode 100644 index 00000000..de821453 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml new file mode 100644 index 00000000..8c38cf29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml new file mode 100644 index 00000000..a2c3f8c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml new file mode 100644 index 00000000..1e2d0ce9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml new file mode 100644 index 00000000..5b6a3639 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml new file mode 100644 index 00000000..807e01d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml @@ -0,0 +1,4 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml new file mode 100644 index 00000000..4a05e4ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human aging.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml new file mode 100644 index 00000000..7d4f87dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml @@ -0,0 +1,4 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml new file mode 100644 index 00000000..4c87ca95 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml new file mode 100644 index 00000000..f893df3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml @@ -0,0 +1,4 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml new file mode 100644 index 00000000..d845ed1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml new file mode 100644 index 00000000..de7bae42 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml @@ -0,0 +1,4 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml new file mode 100644 index 00000000..693b6efe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml @@ -0,0 +1,4 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml new file mode 100644 index 00000000..6d999b72 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml @@ -0,0 +1,4 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml new file mode 100644 index 00000000..0693416e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml @@ -0,0 +1,4 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml new file mode 100644 index 00000000..49b669ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml @@ -0,0 +1,4 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml new file mode 100644 index 00000000..bfc9e4e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml new file mode 100644 index 00000000..f8dd7efb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml @@ -0,0 +1,4 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml new file mode 100644 index 00000000..703db244 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml @@ -0,0 +1,4 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml new file mode 100644 index 00000000..cbf41b70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml new file mode 100644 index 00000000..c109226c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml @@ -0,0 +1,4 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml new file mode 100644 index 00000000..9ef8738b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml new file mode 100644 index 00000000..f748683f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional law.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml new file mode 100644 index 00000000..077c4575 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml new file mode 100644 index 00000000..8bcbac84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml new file mode 100644 index 00000000..5509e50a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml @@ -0,0 +1,4 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public relations.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml new file mode 100644 index 00000000..f8a0cf58 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml @@ -0,0 +1,4 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security studies.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml new file mode 100644 index 00000000..9ede980b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml new file mode 100644 index 00000000..51e28c67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml @@ -0,0 +1,4 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml new file mode 100644 index 00000000..7c0d1acb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml @@ -0,0 +1,4 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml new file mode 100644 index 00000000..706aa92f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml @@ -0,0 +1,4 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world religions.\n\n" +"include": "_template_yaml" +"task": "mmlu_style_01c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/styles.py b/lm_eval/tasks/mmlu/alternative_worlds/styles.py new file mode 100644 index 00000000..275503c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/styles.py @@ -0,0 +1,86 @@ +import string +from functools import partial + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n\n".join([ + "Question: "+doc["question"].strip()+"\nAnswer:", + ] + [ + choice_string.format(i,j) for i,j in zip(letter_list, choices) + ] + ) + + return doc_to_text + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t","") for letter in letter_list] + + return letter_list + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter+" " for letter in letter_list] + + return [letter+choice for letter, choice in zip(letter_list, choices)] + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") + + -- GitLab From c74e2761d031b9c32c93ba37dd174c2c9b1abdaa Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 6 Dec 2023 08:25:58 +0000 Subject: [PATCH 21/50] reformat --- lm_eval/api/metrics.py | 4 ++-- lm_eval/api/task.py | 2 +- .../tasks/arc/alternative_worlds/README.md | 4 ++-- .../arc/alternative_worlds/style_01/a.yaml | 4 ++-- .../arc/alternative_worlds/style_01/b.yaml | 4 ++-- .../arc/alternative_worlds/style_01/c.yaml | 4 ++-- .../arc/alternative_worlds/style_02/a.yaml | 4 ++-- .../arc/alternative_worlds/style_02/b.yaml | 4 ++-- .../arc/alternative_worlds/style_02/c.yaml | 4 ++-- .../arc/alternative_worlds/style_03/a.yaml | 4 ++-- .../arc/alternative_worlds/style_03/b.yaml | 4 ++-- .../arc/alternative_worlds/style_03/c.yaml | 4 ++-- .../arc/alternative_worlds/style_04/a.yaml | 4 ++-- .../arc/alternative_worlds/style_04/b.yaml | 4 ++-- .../arc/alternative_worlds/style_04/c.yaml | 4 ++-- .../arc/alternative_worlds/style_05/a.yaml | 4 ++-- .../arc/alternative_worlds/style_05/b.yaml | 4 ++-- .../arc/alternative_worlds/style_05/c.yaml | 4 ++-- .../arc/alternative_worlds/style_06/a.yaml | 4 ++-- .../arc/alternative_worlds/style_06/b.yaml | 4 ++-- .../arc/alternative_worlds/style_06/c.yaml | 4 ++-- .../arc/alternative_worlds/style_07/a.yaml | 4 ++-- .../arc/alternative_worlds/style_07/b.yaml | 4 ++-- .../arc/alternative_worlds/style_07/c.yaml | 4 ++-- .../arc/alternative_worlds/style_08/a.yaml | 4 ++-- .../arc/alternative_worlds/style_08/b.yaml | 4 ++-- .../arc/alternative_worlds/style_08/c.yaml | 4 ++-- .../tasks/arc/alternative_worlds/styles.py | 21 +++++++++-------- .../alternative_worlds/_template_yaml | 2 +- .../alternative_worlds/arithmetic_alt.yaml | 2 +- .../style_00/_template_00_yaml | 2 +- .../style_00/arithmetic_1dc.yaml | 2 +- .../style_00/arithmetic_2da.yaml | 2 +- .../style_00/arithmetic_2dm.yaml | 2 +- .../style_00/arithmetic_2ds.yaml | 2 +- .../style_00/arithmetic_3da.yaml | 2 +- .../style_00/arithmetic_3ds.yaml | 2 +- .../style_00/arithmetic_4da.yaml | 2 +- .../style_00/arithmetic_4ds.yaml | 2 +- .../style_00/arithmetic_5da.yaml | 2 +- .../style_00/arithmetic_5ds.yaml | 2 +- .../style_01/_template_01_yaml | 2 +- .../style_01/arithmetic_1dc.yaml | 2 +- .../style_01/arithmetic_2da.yaml | 2 +- .../style_01/arithmetic_2dm.yaml | 2 +- .../style_01/arithmetic_2ds.yaml | 2 +- .../style_01/arithmetic_3da.yaml | 2 +- .../style_01/arithmetic_3ds.yaml | 2 +- .../style_01/arithmetic_4da.yaml | 2 +- .../style_01/arithmetic_4ds.yaml | 2 +- .../style_01/arithmetic_5da.yaml | 2 +- .../style_01/arithmetic_5ds.yaml | 2 +- .../style_02/_template_02_yaml | 2 +- .../style_02/arithmetic_1dc.yaml | 2 +- .../style_02/arithmetic_2da.yaml | 2 +- .../style_02/arithmetic_2dm.yaml | 2 +- .../style_02/arithmetic_2ds.yaml | 2 +- .../style_02/arithmetic_3da.yaml | 2 +- .../style_02/arithmetic_3ds.yaml | 2 +- .../style_02/arithmetic_4da.yaml | 2 +- .../style_02/arithmetic_4ds.yaml | 2 +- .../style_02/arithmetic_5da.yaml | 2 +- .../style_02/arithmetic_5ds.yaml | 2 +- .../style_03/_template_03_yaml | 2 +- .../style_03/arithmetic_1dc.yaml | 2 +- .../style_03/arithmetic_2da.yaml | 2 +- .../style_03/arithmetic_2dm.yaml | 2 +- .../style_03/arithmetic_2ds.yaml | 2 +- .../style_03/arithmetic_3da.yaml | 2 +- .../style_03/arithmetic_3ds.yaml | 2 +- .../style_03/arithmetic_4da.yaml | 2 +- .../style_03/arithmetic_4ds.yaml | 2 +- .../style_03/arithmetic_5da.yaml | 2 +- .../style_03/arithmetic_5ds.yaml | 2 +- .../style_04/_template_04_yaml | 2 +- .../style_04/arithmetic_1dc.yaml | 2 +- .../style_04/arithmetic_2da.yaml | 2 +- .../style_04/arithmetic_2dm.yaml | 2 +- .../style_04/arithmetic_2ds.yaml | 2 +- .../style_04/arithmetic_3da.yaml | 2 +- .../style_04/arithmetic_3ds.yaml | 2 +- .../style_04/arithmetic_4da.yaml | 2 +- .../style_04/arithmetic_4ds.yaml | 2 +- .../style_04/arithmetic_5da.yaml | 2 +- .../style_04/arithmetic_5ds.yaml | 2 +- .../style_05/_template_05_yaml | 2 +- .../style_05/arithmetic_1dc.yaml | 2 +- .../style_05/arithmetic_2da.yaml | 2 +- .../style_05/arithmetic_2dm.yaml | 2 +- .../style_05/arithmetic_2ds.yaml | 2 +- .../style_05/arithmetic_3da.yaml | 2 +- .../style_05/arithmetic_3ds.yaml | 2 +- .../style_05/arithmetic_4da.yaml | 2 +- .../style_05/arithmetic_4ds.yaml | 2 +- .../style_05/arithmetic_5da.yaml | 2 +- .../style_05/arithmetic_5ds.yaml | 2 +- .../arithmetic/alternative_worlds/utils.py | 11 +++++++-- .../hellaswag/alternative_worlds/README.md | 4 ++-- .../alternative_worlds/style_01/a.yaml | 4 ++-- .../alternative_worlds/style_01/b.yaml | 4 ++-- .../alternative_worlds/style_01/c.yaml | 4 ++-- .../alternative_worlds/style_02/a.yaml | 4 ++-- .../alternative_worlds/style_02/b.yaml | 4 ++-- .../alternative_worlds/style_02/c.yaml | 4 ++-- .../alternative_worlds/style_03/a.yaml | 4 ++-- .../alternative_worlds/style_03/b.yaml | 4 ++-- .../alternative_worlds/style_03/c.yaml | 4 ++-- .../alternative_worlds/style_04/a.yaml | 4 ++-- .../alternative_worlds/style_04/b.yaml | 4 ++-- .../alternative_worlds/style_04/c.yaml | 4 ++-- .../alternative_worlds/style_05/a.yaml | 4 ++-- .../alternative_worlds/style_05/b.yaml | 4 ++-- .../alternative_worlds/style_05/c.yaml | 4 ++-- .../alternative_worlds/style_06/a.yaml | 4 ++-- .../alternative_worlds/style_06/b.yaml | 4 ++-- .../alternative_worlds/style_06/c.yaml | 4 ++-- .../alternative_worlds/style_07/a.yaml | 4 ++-- .../alternative_worlds/style_07/b.yaml | 4 ++-- .../alternative_worlds/style_07/c.yaml | 4 ++-- .../alternative_worlds/style_08/a.yaml | 4 ++-- .../alternative_worlds/style_08/b.yaml | 4 ++-- .../alternative_worlds/style_08/c.yaml | 4 ++-- .../hellaswag/alternative_worlds/styles.py | 23 +++++++++++-------- .../tasks/mathqa/alternative_worlds/README.md | 4 ++-- .../mathqa/alternative_worlds/style_01/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_01/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_01/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_02/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_02/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_02/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_03/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_03/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_03/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_04/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_04/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_04/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_05/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_05/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_05/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_06/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_06/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_06/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_07/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_07/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_07/c.yaml | 4 ++-- .../mathqa/alternative_worlds/style_08/a.yaml | 4 ++-- .../mathqa/alternative_worlds/style_08/b.yaml | 4 ++-- .../mathqa/alternative_worlds/style_08/c.yaml | 4 ++-- .../tasks/mathqa/alternative_worlds/styles.py | 19 ++++++++------- .../mmlu_alternative_worlds_fc.yaml | 2 +- .../mmlu_alternative_worlds_lo.yaml | 2 +- .../style_01/a/_template_yaml | 2 +- .../style_01/b/_template_yaml | 2 +- .../style_01/c/_template_yaml | 2 +- .../tasks/mmlu/alternative_worlds/styles.py | 21 +++++++++-------- 155 files changed, 283 insertions(+), 264 deletions(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 2596da2a..27304cae 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -127,10 +127,10 @@ def brier_score(items): # This is a passthrough function _p = np.array(p) _g = np.array(g) _g_one_hot = np.eye(len(_p[0]))[_g] - average += np.mean(np.sum((_p - _g_one_hot) ** 2, axis=1))*len(_g) + average += np.mean(np.sum((_p - _g_one_hot) ** 2, axis=1)) * len(_g) total_size += len(_g) - return average/total_size + return average / total_size @register_metric( diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 70a140e0..5f8b1476 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1020,7 +1020,7 @@ class ConfigurableTask(Task): **({"perplexity": ll} if "perplexity" in use_metric else {}), **({"acc": int(is_greedy)} if "acc" in use_metric else {}), **( - {"brier_score": (0, [prob_norm])} # Gold is Index 0 + {"brier_score": (0, [prob_norm])} # Gold is Index 0 if "brier_score" in use_metric else {} ), diff --git a/lm_eval/tasks/arc/alternative_worlds/README.md b/lm_eval/tasks/arc/alternative_worlds/README.md index a9f58e69..93600ae1 100644 --- a/lm_eval/tasks/arc/alternative_worlds/README.md +++ b/lm_eval/tasks/arc/alternative_worlds/README.md @@ -15,6 +15,6 @@ Answer types: - original option - just letter - letters + continuation - - original option + - original option - just letter -- continuation \ No newline at end of file +- continuation diff --git a/lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml index dbd35503..2d95b888 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_01 -group_alias: style_01 +group_alias: style_01 task: arc_easy_01a task_alias: a doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01a \ No newline at end of file +doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml index 7834ce30..75fef77f 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_01 -group_alias: style_01 +group_alias: style_01 task: arc_easy_01b task_alias: b doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01b \ No newline at end of file +doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml index 4b79e537..317233ac 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_01 -group_alias: style_01 +group_alias: style_01 task: arc_easy_01c task_alias: c doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01c \ No newline at end of file +doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml index 94096e17..7819b8bd 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_02 -group_alias: style_02 +group_alias: style_02 task: arc_easy_02a task_alias: a doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02a \ No newline at end of file +doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml index 07606e37..3223a773 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_02 -group_alias: style_02 +group_alias: style_02 task: arc_easy_02b task_alias: b doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02b \ No newline at end of file +doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml index c024feaa..6b128406 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_02 -group_alias: style_02 +group_alias: style_02 task: arc_easy_02c task_alias: c doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02c \ No newline at end of file +doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml index b8b215ce..339f70d8 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_03 -group_alias: style_03 +group_alias: style_03 task: arc_easy_03a task_alias: a doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03a \ No newline at end of file +doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml index b12fa32e..e700f628 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_03 -group_alias: style_03 +group_alias: style_03 task: arc_easy_03b task_alias: b doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03b \ No newline at end of file +doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml index b97d03b2..6f0feff8 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_03 -group_alias: style_03 +group_alias: style_03 task: arc_easy_03c task_alias: c doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03c \ No newline at end of file +doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml index 383bd8ba..99578be0 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_04 -group_alias: style_04 +group_alias: style_04 task: arc_easy_04a task_alias: a doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04a \ No newline at end of file +doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml index 82dc2df7..81edfbc4 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_04 -group_alias: style_04 +group_alias: style_04 task: arc_easy_04b task_alias: b doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04b \ No newline at end of file +doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml index f21636c4..f970376a 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_04 -group_alias: style_04 +group_alias: style_04 task: arc_easy_04c task_alias: c doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04c \ No newline at end of file +doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml index 15199212..00cd7aaa 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_05 -group_alias: style_05 +group_alias: style_05 task: arc_easy_05a task_alias: a doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05a \ No newline at end of file +doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml index fc88fa82..253e38e5 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_05 -group_alias: style_05 +group_alias: style_05 task: arc_easy_05b task_alias: b doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05b \ No newline at end of file +doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml index b234f0c3..6066248b 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_05 -group_alias: style_05 +group_alias: style_05 task: arc_easy_05c task_alias: c doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05c \ No newline at end of file +doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml index cc6b8105..1ddeb4f5 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_06 -group_alias: style_06 +group_alias: style_06 task: arc_easy_06a task_alias: a doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06a \ No newline at end of file +doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml index e544e7dd..4ef19810 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_06 -group_alias: style_06 +group_alias: style_06 task: arc_easy_06b task_alias: b doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06b \ No newline at end of file +doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml index 048d0afc..0ce0c2da 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_06 -group_alias: style_06 +group_alias: style_06 task: arc_easy_06c task_alias: c doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06c \ No newline at end of file +doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml index 8f1c8ff9..9229f190 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_07 -group_alias: style_07 +group_alias: style_07 task: arc_easy_07a task_alias: a doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07a \ No newline at end of file +doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml index af3f7cc8..85d8017e 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_07 -group_alias: style_07 +group_alias: style_07 task: arc_easy_07b task_alias: b doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07b \ No newline at end of file +doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml index c390ae3e..9eb8ba53 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_07 -group_alias: style_07 +group_alias: style_07 task: arc_easy_07c task_alias: c doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07c \ No newline at end of file +doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml index 0a794fa7..37dcd2a7 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_08 -group_alias: style_08 +group_alias: style_08 task: arc_easy_08a task_alias: a doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08a \ No newline at end of file +doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml index afc40e90..0a7df350 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_08 -group_alias: style_08 +group_alias: style_08 task: arc_easy_08b task_alias: b doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08b \ No newline at end of file +doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml index c84813c4..c94c41f4 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml @@ -1,7 +1,7 @@ include: ../_arc_easy_alt_yaml group: arc_easy_08 -group_alias: style_08 +group_alias: style_08 task: arc_easy_08c task_alias: c doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08c \ No newline at end of file +doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/arc/alternative_worlds/styles.py b/lm_eval/tasks/arc/alternative_worlds/styles.py index c0cb5b12..3d075517 100644 --- a/lm_eval/tasks/arc/alternative_worlds/styles.py +++ b/lm_eval/tasks/arc/alternative_worlds/styles.py @@ -1,6 +1,7 @@ import string from functools import partial + def doc_to_text_base(alphabet, style, doc): choices = doc["choices"]["text"] @@ -13,19 +14,21 @@ def doc_to_text_base(alphabet, style, doc): else: choice_string = "{} {}" - doc_to_text = "\n\n".join([ - "Question: "+doc["question"]+"\nAnswer:", - ] + [ - choice_string.format(i,j) for i,j in zip(letter_list, choices) + doc_to_text = "\n\n".join( + [ + "Question: " + doc["question"] + "\nAnswer:", ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] ) return doc_to_text + # Full continuation def choice_A(doc): return doc["choices"]["text"] + # Letters only def choice_B(alphabet, style, doc): @@ -34,10 +37,11 @@ def choice_B(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" in style: - letter_list = [letter.replace("\t","") for letter in letter_list] + letter_list = [letter.replace("\t", "") for letter in letter_list] return letter_list + # Letters + Full continuation def choice_C(alphabet, style, doc): @@ -46,9 +50,10 @@ def choice_C(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" not in style: - letter_list = [letter+" " for letter in letter_list] + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] - return [letter+choice for letter, choice in zip(letter_list, choices)] template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") choice_01a = choice_A @@ -82,5 +87,3 @@ template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") choice_08a = choice_A choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") - - diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml index 16a5177e..b68001e1 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/_template_yaml @@ -10,4 +10,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml index 8bcf1a02..1e678c04 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/arithmetic_alt.yaml @@ -5,4 +5,4 @@ task: - arithmetic_alt_02 - arithmetic_alt_03 - arithmetic_alt_04 - - arithmetic_alt_05 \ No newline at end of file + - arithmetic_alt_05 diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml index d8c4d719..43b73a88 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/_template_00_yaml @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml index 8b103a12..0c989c6f 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_1dc.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_1dc_alt_00 dataset_name: arithmetic_1dc -task_alias: 1dc \ No newline at end of file +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml index 938aa422..11501c8f 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2da.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_2da_alt_00 dataset_name: arithmetic_2da -task_alias: 2da \ No newline at end of file +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml index 8096691a..7a0d35b1 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2dm.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_2dm_alt_00 dataset_name: arithmetic_2dm -task_alias: 2dm \ No newline at end of file +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml index 7e6a9a72..0720c833 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_2ds.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_2ds_alt_00 dataset_name: arithmetic_2ds -task_alias: 2ds \ No newline at end of file +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml index 71b72218..3f6cf88a 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3da.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_3da_alt_00 dataset_name: arithmetic_3da -task_alias: 3da \ No newline at end of file +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml index b2330c6e..a83dffe0 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_3ds.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_3ds_alt_00 dataset_name: arithmetic_3ds -task_alias: 3ds \ No newline at end of file +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml index 8974feee..80999856 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4da.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_4da_alt_00 dataset_name: arithmetic_4da -task_alias: 4da \ No newline at end of file +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml index d08c4aee..59a6e5a0 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_4ds.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_4ds_alt_00 dataset_name: arithmetic_4ds -task_alias: 4ds \ No newline at end of file +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml index d8a81f25..f172341b 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5da.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_5da_alt_00 dataset_name: arithmetic_5da -task_alias: 5da \ No newline at end of file +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml index 36b9e453..2da65545 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_00/arithmetic_5ds.yaml @@ -1,4 +1,4 @@ include: _template_00_yaml task: arithmetic_5ds_alt_00 dataset_name: arithmetic_5ds -task_alias: 5ds \ No newline at end of file +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml index 60a1ee13..5f10c542 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/_template_01_yaml @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml index 0134f80f..2764ccb2 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_1dc.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_1dc_alt_01 dataset_name: arithmetic_1dc -task_alias: 1dc \ No newline at end of file +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml index 2b3f8bd4..753eef6a 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2da.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_2da_alt_01 dataset_name: arithmetic_2da -task_alias: 2da \ No newline at end of file +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml index 92b0521a..abbbecc4 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2dm.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_2dm_alt_01 dataset_name: arithmetic_2dm -task_alias: 2dm \ No newline at end of file +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml index b44a5556..3f069e28 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_2ds.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_2ds_alt_01 dataset_name: arithmetic_2ds -task_alias: 2ds \ No newline at end of file +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml index 7f8e8b2c..6fa481f5 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3da.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_3da_alt_01 dataset_name: arithmetic_3da -task_alias: 3da \ No newline at end of file +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml index eb604704..5e8d27b4 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_3ds.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_3ds_alt_01 dataset_name: arithmetic_3ds -task_alias: 3ds \ No newline at end of file +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml index abad02c2..5564a98c 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4da.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_4da_alt_01 dataset_name: arithmetic_4da -task_alias: 4da \ No newline at end of file +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml index 0b022c46..765400b6 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_4ds.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_4ds_alt_01 dataset_name: arithmetic_4ds -task_alias: 4ds \ No newline at end of file +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml index 21e28815..44548858 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5da.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_5da_alt_01 dataset_name: arithmetic_5da -task_alias: 5da \ No newline at end of file +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml index d3622a86..571bc80d 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_01/arithmetic_5ds.yaml @@ -1,4 +1,4 @@ include: _template_01_yaml task: arithmetic_5ds_alt_01 dataset_name: arithmetic_5ds -task_alias: 5ds \ No newline at end of file +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml index 8db3ca7c..2e2a3f5c 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/_template_02_yaml @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml index 33f2b064..6d3184da 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_1dc.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_1dc_alt_02 dataset_name: arithmetic_1dc -task_alias: 1dc \ No newline at end of file +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml index 4cf1304e..2a7a9e59 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2da.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_2da_alt_02 dataset_name: arithmetic_2da -task_alias: 2da \ No newline at end of file +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml index 41ceaa84..30c38a6d 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2dm.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_2dm_alt_02 dataset_name: arithmetic_2dm -task_alias: 2dm \ No newline at end of file +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml index dbbd41dc..ab18bce0 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_2ds.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_2ds_alt_02 dataset_name: arithmetic_2ds -task_alias: 2ds \ No newline at end of file +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml index e39181e8..e59c3798 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3da.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_3da_alt_02 dataset_name: arithmetic_3da -task_alias: 3da \ No newline at end of file +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml index 5e643bcb..6d37ef2a 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_3ds.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_3ds_alt_02 dataset_name: arithmetic_3ds -task_alias: 3ds \ No newline at end of file +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml index a57fbdff..eabbb1a0 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4da.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_4da_alt_02 dataset_name: arithmetic_4da -task_alias: 4da \ No newline at end of file +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml index baef145d..8ce2bb79 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_4ds.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_4ds_alt_02 dataset_name: arithmetic_4ds -task_alias: 4ds \ No newline at end of file +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml index f35f2c56..48ef1aaa 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5da.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_5da_alt_02 dataset_name: arithmetic_5da -task_alias: 5da \ No newline at end of file +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml index 8932c906..1941ff65 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_02/arithmetic_5ds.yaml @@ -1,4 +1,4 @@ include: _template_02_yaml task: arithmetic_5ds_alt_02 dataset_name: arithmetic_5ds -task_alias: 5ds \ No newline at end of file +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml index 7e9862e9..275559d1 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/_template_03_yaml @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml index 1dbf243f..c68279f4 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_1dc.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_1dc_alt_03 dataset_name: arithmetic_1dc -task_alias: 1dc \ No newline at end of file +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml index c2d8b41a..0ef30ee7 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2da.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_2da_alt_03 dataset_name: arithmetic_2da -task_alias: 2da \ No newline at end of file +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml index 4e7a986e..f742d37b 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2dm.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_2dm_alt_03 dataset_name: arithmetic_2dm -task_alias: 2dm \ No newline at end of file +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml index 626d1d93..59a85bfb 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_2ds.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_2ds_alt_03 dataset_name: arithmetic_2ds -task_alias: 2ds \ No newline at end of file +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml index 4dac4f85..d4e4d772 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3da.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_3da_alt_03 dataset_name: arithmetic_3da -task_alias: 3da \ No newline at end of file +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml index b8ccc17f..7022a855 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_3ds.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_3ds_alt_03 dataset_name: arithmetic_3ds -task_alias: 3ds \ No newline at end of file +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml index b4312343..0d70f481 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4da.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_4da_alt_03 dataset_name: arithmetic_4da -task_alias: 4da \ No newline at end of file +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml index 6a6e173c..77cd5c65 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_4ds.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_4ds_alt_03 dataset_name: arithmetic_4ds -task_alias: 4ds \ No newline at end of file +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml index 6fff06bc..02aec1d1 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5da.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_5da_alt_03 dataset_name: arithmetic_5da -task_alias: 5da \ No newline at end of file +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml index 0d52562f..6bbbd3a1 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_03/arithmetic_5ds.yaml @@ -1,4 +1,4 @@ include: _template_03_yaml task: arithmetic_5ds_alt_03 dataset_name: arithmetic_5ds -task_alias: 5ds \ No newline at end of file +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml index b21fa53c..6680afad 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/_template_04_yaml @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml index b9dd9b44..8d940e1c 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_1dc.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_1dc_alt_04 dataset_name: arithmetic_1dc -task_alias: 1dc \ No newline at end of file +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml index 3a45ade3..0627fe5e 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2da.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_2da_alt_04 dataset_name: arithmetic_2da -task_alias: 2da \ No newline at end of file +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml index d2ef1777..667259c8 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2dm.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_2dm_alt_04 dataset_name: arithmetic_2dm -task_alias: 2dm \ No newline at end of file +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml index 9bd31b5e..fc45ee63 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_2ds.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_2ds_alt_04 dataset_name: arithmetic_2ds -task_alias: 2ds \ No newline at end of file +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml index c72f5526..d1468745 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3da.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_3da_alt_04 dataset_name: arithmetic_3da -task_alias: 3da \ No newline at end of file +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml index 94cf4666..d67daf13 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_3ds.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_3ds_alt_04 dataset_name: arithmetic_3ds -task_alias: 3ds \ No newline at end of file +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml index 00031d76..339c47f4 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4da.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_4da_alt_04 dataset_name: arithmetic_4da -task_alias: 4da \ No newline at end of file +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml index 95dd6f2d..c983c896 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_4ds.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_4ds_alt_04 dataset_name: arithmetic_4ds -task_alias: 4ds \ No newline at end of file +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml index 6a667d33..7e9cbf0f 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5da.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_5da_alt_04 dataset_name: arithmetic_5da -task_alias: 5da \ No newline at end of file +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml index afa3996f..dca4bf98 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_04/arithmetic_5ds.yaml @@ -1,4 +1,4 @@ include: _template_04_yaml task: arithmetic_5ds_alt_04 dataset_name: arithmetic_5ds -task_alias: 5ds \ No newline at end of file +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml index c6c06737..f838a10e 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/_template_05_yaml @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true - metric: brier_score - higher_is_better: false \ No newline at end of file + higher_is_better: false diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml index 3b98b8c7..2c0f4cec 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_1dc.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_1dc_alt_05 dataset_name: arithmetic_1dc -task_alias: 1dc \ No newline at end of file +task_alias: 1dc diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml index e53560b3..94632d23 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2da.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_2da_alt_05 dataset_name: arithmetic_2da -task_alias: 2da \ No newline at end of file +task_alias: 2da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml index ab0be4b8..df44aa68 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2dm.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_2dm_alt_05 dataset_name: arithmetic_2dm -task_alias: 2dm \ No newline at end of file +task_alias: 2dm diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml index e95734e5..d34f87d1 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_2ds.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_2ds_alt_05 dataset_name: arithmetic_2ds -task_alias: 2ds \ No newline at end of file +task_alias: 2ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml index 3052c7dc..5bbd6b9e 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3da.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_3da_alt_05 dataset_name: arithmetic_3da -task_alias: 3da \ No newline at end of file +task_alias: 3da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml index 050dae04..32d15f72 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_3ds.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_3ds_alt_05 dataset_name: arithmetic_3ds -task_alias: 3ds \ No newline at end of file +task_alias: 3ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml index 5d764d26..09b7fda5 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4da.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_4da_alt_05 dataset_name: arithmetic_4da -task_alias: 4da \ No newline at end of file +task_alias: 4da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml index 3915be0e..2d4e4c49 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_4ds.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_4ds_alt_05 dataset_name: arithmetic_4ds -task_alias: 4ds \ No newline at end of file +task_alias: 4ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml index 9ede053c..12b88bc3 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5da.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_5da_alt_05 dataset_name: arithmetic_5da -task_alias: 5da \ No newline at end of file +task_alias: 5da diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml index 7841b717..406926a2 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml +++ b/lm_eval/tasks/arithmetic/alternative_worlds/style_05/arithmetic_5ds.yaml @@ -1,4 +1,4 @@ include: _template_05_yaml task: arithmetic_5ds_alt_05 dataset_name: arithmetic_5ds -task_alias: 5ds \ No newline at end of file +task_alias: 5ds diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py index a109e492..6f2e69c0 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py +++ b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py @@ -7,27 +7,34 @@ def style_00(docs): # What is (9 + 8) * 2? return docs["context"] + def style_01(docs): # What is (9 + 8) * 2? return docs["context"].replace("Question: ", "").replace(" Answer:", "") + def style_02(docs): # Q: What is (9 + 8) * 2? A: return docs["context"].replace("Question: ", "Q: ").replace(" Answer:", " A:") + def style_03(docs): # Solve (9 + 8) * 2. - return docs["context"].replace("Question: What is", "Solve").replace(" Answer:", ".") + return ( + docs["context"].replace("Question: What is", "Solve").replace(" Answer:", ".") + ) + def style_04(docs): # (9 + 8) * 2 = return docs["context"].replace("Question: What is ", "").replace(" Answer:", " =") + def style_05(docs): # What is (9 + 8) * 2? Answer: - return docs["context"].replace("Question: ", "") \ No newline at end of file + return docs["context"].replace("Question: ", "") diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/README.md b/lm_eval/tasks/hellaswag/alternative_worlds/README.md index a9f58e69..93600ae1 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/README.md +++ b/lm_eval/tasks/hellaswag/alternative_worlds/README.md @@ -15,6 +15,6 @@ Answer types: - original option - just letter - letters + continuation - - original option + - original option - just letter -- continuation \ No newline at end of file +- continuation diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml index dc363efa..db54afc0 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_01 -group_alias: style_01 +group_alias: style_01 task: hellaswag_01a task_alias: a doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01a \ No newline at end of file +doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml index 8049df97..7c5890ca 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_01 -group_alias: style_01 +group_alias: style_01 task: hellaswag_01b task_alias: b doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01b \ No newline at end of file +doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml index 62ed4949..2e7d61eb 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_01/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_01 -group_alias: style_01 +group_alias: style_01 task: hellaswag_01c task_alias: c doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01c \ No newline at end of file +doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml index 328be078..bae1a7c4 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_02 -group_alias: style_02 +group_alias: style_02 task: hellaswag_02a task_alias: a doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02a \ No newline at end of file +doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml index 73d01b56..b808d99b 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_02 -group_alias: style_02 +group_alias: style_02 task: hellaswag_02b task_alias: b doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02b \ No newline at end of file +doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml index 32a8d8d5..3fff20f2 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_02/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_02 -group_alias: style_02 +group_alias: style_02 task: hellaswag_02c task_alias: c doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02c \ No newline at end of file +doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml index 407b84de..9b87ae71 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_03 -group_alias: style_03 +group_alias: style_03 task: hellaswag_03a task_alias: a doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03a \ No newline at end of file +doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml index ef1f6127..1c93ffb3 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_03 -group_alias: style_03 +group_alias: style_03 task: hellaswag_03b task_alias: b doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03b \ No newline at end of file +doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml index 1e7edecb..f33ccb84 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_03/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_03 -group_alias: style_03 +group_alias: style_03 task: hellaswag_03c task_alias: c doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03c \ No newline at end of file +doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml index 56cef2bf..bb5bd68f 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_04 -group_alias: style_04 +group_alias: style_04 task: hellaswag_04a task_alias: a doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04a \ No newline at end of file +doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml index 04bb9397..7a1e6156 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_04 -group_alias: style_04 +group_alias: style_04 task: hellaswag_04b task_alias: b doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04b \ No newline at end of file +doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml index aee06df7..0a30c7ab 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_04/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_04 -group_alias: style_04 +group_alias: style_04 task: hellaswag_04c task_alias: c doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04c \ No newline at end of file +doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml index d7709b4e..cd7b773b 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_05 -group_alias: style_05 +group_alias: style_05 task: hellaswag_05a task_alias: a doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05a \ No newline at end of file +doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml index a0ed5a99..4cbce6bd 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_05 -group_alias: style_05 +group_alias: style_05 task: hellaswag_05b task_alias: b doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05b \ No newline at end of file +doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml index da95c1e1..ca7e8a21 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_05/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_05 -group_alias: style_05 +group_alias: style_05 task: hellaswag_05c task_alias: c doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05c \ No newline at end of file +doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml index fd438dbf..95a007b0 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_06 -group_alias: style_06 +group_alias: style_06 task: hellaswag_06a task_alias: a doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06a \ No newline at end of file +doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml index e7bf0051..3df5d942 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_06 -group_alias: style_06 +group_alias: style_06 task: hellaswag_06b task_alias: b doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06b \ No newline at end of file +doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml index a93af76b..a690f6e7 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_06/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_06 -group_alias: style_06 +group_alias: style_06 task: hellaswag_06c task_alias: c doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06c \ No newline at end of file +doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml index 391cad9c..5d3d7329 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_07 -group_alias: style_07 +group_alias: style_07 task: hellaswag_07a task_alias: a doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07a \ No newline at end of file +doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml index d4547f29..3764e9e5 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_07 -group_alias: style_07 +group_alias: style_07 task: hellaswag_07b task_alias: b doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07b \ No newline at end of file +doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml index 223e60e2..9be6f58c 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_07/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_07 -group_alias: style_07 +group_alias: style_07 task: hellaswag_07c task_alias: c doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07c \ No newline at end of file +doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml index 6fc8768f..d7d5e465 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/a.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_08 -group_alias: style_08 +group_alias: style_08 task: hellaswag_08a task_alias: a doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08a \ No newline at end of file +doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml index dd8b9762..7efccdd1 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/b.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_08 -group_alias: style_08 +group_alias: style_08 task: hellaswag_08b task_alias: b doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08b \ No newline at end of file +doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml index d92a8465..4642a3c4 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml +++ b/lm_eval/tasks/hellaswag/alternative_worlds/style_08/c.yaml @@ -1,7 +1,7 @@ include: ../_hellaswag_alt_yaml group: hellaswag_08 -group_alias: style_08 +group_alias: style_08 task: hellaswag_08c task_alias: c doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08c \ No newline at end of file +doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/hellaswag/alternative_worlds/styles.py b/lm_eval/tasks/hellaswag/alternative_worlds/styles.py index 204465fb..4aa8c268 100644 --- a/lm_eval/tasks/hellaswag/alternative_worlds/styles.py +++ b/lm_eval/tasks/hellaswag/alternative_worlds/styles.py @@ -1,6 +1,7 @@ import string from functools import partial + def doc_to_text_base(alphabet, style, doc): choices = doc["choices"] @@ -13,20 +14,22 @@ def doc_to_text_base(alphabet, style, doc): else: choice_string = "{} {}" - doc_to_text = "\n\n".join([ - doc["query"]+"...", - " What is the most appropriate continuation?", - ] + [ - choice_string.format(i,j) for i,j in zip(letter_list, choices) + doc_to_text = "\n\n".join( + [ + doc["query"] + "...", + " What is the most appropriate continuation?", ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] ) return doc_to_text + # Full continuation def choice_A(doc): return doc["choices"] + # Letters only def choice_B(alphabet, style, doc): @@ -35,10 +38,11 @@ def choice_B(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" in style: - letter_list = [letter.replace("\t","") for letter in letter_list] + letter_list = [letter.replace("\t", "") for letter in letter_list] return letter_list + # Letters + Full continuation def choice_C(alphabet, style, doc): @@ -47,9 +51,10 @@ def choice_C(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" not in style: - letter_list = [letter+" " for letter in letter_list] + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, doc["choices"])] - return [letter+choice for letter, choice in zip(letter_list, doc["choices"])] template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") choice_01a = choice_A @@ -83,5 +88,3 @@ template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") choice_08a = choice_A choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") - - diff --git a/lm_eval/tasks/mathqa/alternative_worlds/README.md b/lm_eval/tasks/mathqa/alternative_worlds/README.md index a9f58e69..93600ae1 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/README.md +++ b/lm_eval/tasks/mathqa/alternative_worlds/README.md @@ -15,6 +15,6 @@ Answer types: - original option - just letter - letters + continuation - - original option + - original option - just letter -- continuation \ No newline at end of file +- continuation diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml index a0d9d97e..ba88e90f 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_01 -group_alias: style_01 +group_alias: style_01 task: mathqa_01a task_alias: a doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01a \ No newline at end of file +doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml index 7dfc3c7a..879dc769 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_01 -group_alias: style_01 +group_alias: style_01 task: mathqa_01b task_alias: b doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01b \ No newline at end of file +doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml index d2e34371..b05afe50 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_01 -group_alias: style_01 +group_alias: style_01 task: mathqa_01c task_alias: c doc_to_text: !function ../styles.template_01 -doc_to_choice: !function ../styles.choice_01c \ No newline at end of file +doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml index 761dd38a..87899d82 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_02 -group_alias: style_02 +group_alias: style_02 task: mathqa_02a task_alias: a doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02a \ No newline at end of file +doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml index 04c89fb7..40dd7c76 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_02 -group_alias: style_02 +group_alias: style_02 task: mathqa_02b task_alias: b doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02b \ No newline at end of file +doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml index 8a64ecfb..ab981ac1 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_02 -group_alias: style_02 +group_alias: style_02 task: mathqa_02c task_alias: c doc_to_text: !function ../styles.template_02 -doc_to_choice: !function ../styles.choice_02c \ No newline at end of file +doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml index 07ab19e8..c1732de6 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_03 -group_alias: style_03 +group_alias: style_03 task: mathqa_03a task_alias: a doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03a \ No newline at end of file +doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml index d4b36dd5..31b5583b 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_03 -group_alias: style_03 +group_alias: style_03 task: mathqa_03b task_alias: b doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03b \ No newline at end of file +doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml index af3db476..e57e3e2e 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_03 -group_alias: style_03 +group_alias: style_03 task: mathqa_03c task_alias: c doc_to_text: !function ../styles.template_03 -doc_to_choice: !function ../styles.choice_03c \ No newline at end of file +doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml index b2727c34..8123eae6 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_04 -group_alias: style_04 +group_alias: style_04 task: mathqa_04a task_alias: a doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04a \ No newline at end of file +doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml index 0871ce9a..a76ee54c 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_04 -group_alias: style_04 +group_alias: style_04 task: mathqa_04b task_alias: b doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04b \ No newline at end of file +doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml index 28f8a443..2397b7e5 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_04 -group_alias: style_04 +group_alias: style_04 task: mathqa_04c task_alias: c doc_to_text: !function ../styles.template_04 -doc_to_choice: !function ../styles.choice_04c \ No newline at end of file +doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml index 83bff053..0ab6b84a 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_05 -group_alias: style_05 +group_alias: style_05 task: mathqa_05a task_alias: a doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05a \ No newline at end of file +doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml index 1b412bf7..02b48057 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_05 -group_alias: style_05 +group_alias: style_05 task: mathqa_05b task_alias: b doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05b \ No newline at end of file +doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml index b49b6b2f..b8ac931f 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_05 -group_alias: style_05 +group_alias: style_05 task: mathqa_05c task_alias: c doc_to_text: !function ../styles.template_05 -doc_to_choice: !function ../styles.choice_05c \ No newline at end of file +doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml index b3a69d31..91980ebd 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_06 -group_alias: style_06 +group_alias: style_06 task: mathqa_06a task_alias: a doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06a \ No newline at end of file +doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml index e72dab31..82e4ee90 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_06 -group_alias: style_06 +group_alias: style_06 task: mathqa_06b task_alias: b doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06b \ No newline at end of file +doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml index 9b4ecfda..ef7a9b03 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_06 -group_alias: style_06 +group_alias: style_06 task: mathqa_06c task_alias: c doc_to_text: !function ../styles.template_06 -doc_to_choice: !function ../styles.choice_06c \ No newline at end of file +doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml index 5b96ba17..430afd93 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_07 -group_alias: style_07 +group_alias: style_07 task: mathqa_07a task_alias: a doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07a \ No newline at end of file +doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml index 54713bfb..b86771f2 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_07 -group_alias: style_07 +group_alias: style_07 task: mathqa_07b task_alias: b doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07b \ No newline at end of file +doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml index 0d90b216..40deb212 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_07 -group_alias: style_07 +group_alias: style_07 task: mathqa_07c task_alias: c doc_to_text: !function ../styles.template_07 -doc_to_choice: !function ../styles.choice_07c \ No newline at end of file +doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml index dc092c2d..5321598e 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_08 -group_alias: style_08 +group_alias: style_08 task: mathqa_08a task_alias: a doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08a \ No newline at end of file +doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml index 1f51be56..135d41e5 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_08 -group_alias: style_08 +group_alias: style_08 task: mathqa_08b task_alias: b doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08b \ No newline at end of file +doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml index 3ea0434b..c00a30e2 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml @@ -1,7 +1,7 @@ include: ../_mathqa_alt_yaml group: mathqa_08 -group_alias: style_08 +group_alias: style_08 task: mathqa_08c task_alias: c doc_to_text: !function ../styles.template_08 -doc_to_choice: !function ../styles.choice_08c \ No newline at end of file +doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/styles.py b/lm_eval/tasks/mathqa/alternative_worlds/styles.py index 15c93d00..3b72cf05 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/styles.py +++ b/lm_eval/tasks/mathqa/alternative_worlds/styles.py @@ -2,6 +2,7 @@ import re import string from functools import partial + def parse_choices(doc): choices = [ c[4:].rstrip(" ,") @@ -9,6 +10,7 @@ def parse_choices(doc): ] return choices + def doc_to_text_base(alphabet, style, doc): choices = parse_choices(doc) @@ -22,17 +24,18 @@ def doc_to_text_base(alphabet, style, doc): else: choice_string = "{} {}" doc_to_text = "\n\n".join( - [doc["Problem"]] + [ - choice_string.format(i,j) for i,j in zip(letter_list, choices) - ] + [doc["Problem"]] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] ) return doc_to_text + # Full continuation def choice_A(doc): return parse_choices(doc) + # Letters only def choice_B(alphabet, style, doc): @@ -41,10 +44,11 @@ def choice_B(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" in style: - letter_list = [letter.replace("\t","") for letter in letter_list] + letter_list = [letter.replace("\t", "") for letter in letter_list] return letter_list + # Letters + Full continuation def choice_C(alphabet, style, doc): @@ -53,9 +57,10 @@ def choice_C(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" not in style: - letter_list = [letter+" " for letter in letter_list] + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] - return [letter+choice for letter, choice in zip(letter_list, choices)] template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") choice_01a = choice_A @@ -89,5 +94,3 @@ template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") choice_08a = choice_A choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") - - diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml index f22c500e..faca4f74 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml @@ -4,4 +4,4 @@ task: - mmlu_style_02_fc - mmlu_style_03_fc - mmlu_style_04_fc - - mmlu_style_05_fc \ No newline at end of file + - mmlu_style_05_fc diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml index 0de2da42..2e357184 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml @@ -4,4 +4,4 @@ task: - mmlu_style_02_lo - mmlu_style_03_lo - mmlu_style_04_lo - - mmlu_style_05_lo \ No newline at end of file + - mmlu_style_05_lo diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml index 9b01f2d9..9f72c186 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml @@ -1,5 +1,5 @@ group: mmlu_style_01 -group_alias: style_01 +group_alias: style_01 task: mmlu_style_01a task_alias: a dataset_path: cais/mmlu diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml index 888b0f9c..f3cab9d6 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml @@ -1,5 +1,5 @@ group: mmlu_style_01 -group_alias: style_01 +group_alias: style_01 task: mmlu_style_01b task_alias: b dataset_path: cais/mmlu diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml index 04c85bb4..ab765e8b 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml @@ -1,5 +1,5 @@ group: mmlu_style_01 -group_alias: style_01 +group_alias: style_01 task: mmlu_style_01c task_alias: c dataset_path: cais/mmlu diff --git a/lm_eval/tasks/mmlu/alternative_worlds/styles.py b/lm_eval/tasks/mmlu/alternative_worlds/styles.py index 275503c6..b64d5f46 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/styles.py +++ b/lm_eval/tasks/mmlu/alternative_worlds/styles.py @@ -1,6 +1,7 @@ import string from functools import partial + def doc_to_text_base(alphabet, style, doc): choices = doc["choices"]["text"] @@ -13,19 +14,21 @@ def doc_to_text_base(alphabet, style, doc): else: choice_string = "{} {}" - doc_to_text = "\n\n".join([ - "Question: "+doc["question"].strip()+"\nAnswer:", - ] + [ - choice_string.format(i,j) for i,j in zip(letter_list, choices) + doc_to_text = "\n\n".join( + [ + "Question: " + doc["question"].strip() + "\nAnswer:", ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] ) return doc_to_text + # Full continuation def choice_A(doc): return doc["choices"]["text"] + # Letters only def choice_B(alphabet, style, doc): @@ -34,10 +37,11 @@ def choice_B(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" in style: - letter_list = [letter.replace("\t","") for letter in letter_list] + letter_list = [letter.replace("\t", "") for letter in letter_list] return letter_list + # Letters + Full continuation def choice_C(alphabet, style, doc): @@ -46,9 +50,10 @@ def choice_C(alphabet, style, doc): letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" not in style: - letter_list = [letter+" " for letter in letter_list] + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] - return [letter+choice for letter, choice in zip(letter_list, choices)] template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") choice_01a = choice_A @@ -82,5 +87,3 @@ template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") choice_08a = choice_A choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") - - -- GitLab From 55eff8898b4a2111f1b08feb9c906b34b62543cf Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 8 Dec 2023 03:34:51 +0000 Subject: [PATCH 22/50] made prompt and output variations --- .../arc/alternative_worlds/arc_easy_alt.yaml | 10 -------- .../{ => output_variation}/_arc_easy_alt_yaml | 0 .../output_variation/arc_easy_alt.yaml | 10 ++++++++ .../{ => output_variation}/style_01/a.yaml | 7 +++--- .../{ => output_variation}/style_01/b.yaml | 7 +++--- .../{ => output_variation}/style_01/c.yaml | 7 +++--- .../{ => output_variation}/style_02/a.yaml | 7 +++--- .../{ => output_variation}/style_02/b.yaml | 7 +++--- .../{ => output_variation}/style_02/c.yaml | 7 +++--- .../{ => output_variation}/style_03/a.yaml | 7 +++--- .../{ => output_variation}/style_03/b.yaml | 7 +++--- .../{ => output_variation}/style_03/c.yaml | 7 +++--- .../{ => output_variation}/style_04/a.yaml | 7 +++--- .../{ => output_variation}/style_04/b.yaml | 7 +++--- .../{ => output_variation}/style_04/c.yaml | 7 +++--- .../{ => output_variation}/style_05/a.yaml | 7 +++--- .../{ => output_variation}/style_05/b.yaml | 7 +++--- .../{ => output_variation}/style_05/c.yaml | 7 +++--- .../{ => output_variation}/style_06/a.yaml | 7 +++--- .../{ => output_variation}/style_06/b.yaml | 7 +++--- .../{ => output_variation}/style_06/c.yaml | 7 +++--- .../{ => output_variation}/style_07/a.yaml | 7 +++--- .../{ => output_variation}/style_07/b.yaml | 7 +++--- .../{ => output_variation}/style_07/c.yaml | 7 +++--- .../{ => output_variation}/style_08/a.yaml | 7 +++--- .../{ => output_variation}/style_08/b.yaml | 7 +++--- .../{ => output_variation}/style_08/c.yaml | 7 +++--- .../{ => output_variation}/styles.py | 5 ++-- .../prompt_variation/_arc_easy_alt_yaml | 24 +++++++++++++++++++ .../prompt_variation/style_01.yaml | 5 ++++ .../prompt_variation/style_02.yaml | 5 ++++ .../prompt_variation/style_03.yaml | 5 ++++ 32 files changed, 124 insertions(+), 108 deletions(-) delete mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_easy_alt.yaml rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/_arc_easy_alt_yaml (100%) create mode 100644 lm_eval/tasks/arc/alternative_worlds/output_variation/arc_easy_alt.yaml rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_01/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_01/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_01/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_02/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_02/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_02/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_03/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_03/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_03/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_04/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_04/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_04/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_05/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_05/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_05/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_06/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_06/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_06/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_07/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_07/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_07/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_08/a.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_08/b.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/style_08/c.yaml (51%) rename lm_eval/tasks/arc/alternative_worlds/{ => output_variation}/styles.py (96%) create mode 100644 lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_03.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy_alt.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy_alt.yaml deleted file mode 100644 index a92991de..00000000 --- a/lm_eval/tasks/arc/alternative_worlds/arc_easy_alt.yaml +++ /dev/null @@ -1,10 +0,0 @@ -group: arc_easy_alt -task: - - arc_easy_01 - - arc_easy_02 - - arc_easy_03 - - arc_easy_04 - - arc_easy_05 - - arc_easy_06 - - arc_easy_07 - - arc_easy_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/_arc_easy_alt_yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/_arc_easy_alt_yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/_arc_easy_alt_yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/arc_easy_alt.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/arc_easy_alt.yaml new file mode 100644 index 00000000..af4dcf97 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/arc_easy_alt.yaml @@ -0,0 +1,10 @@ +group: arc_easy_alt_ov +task: + - arc_easy_alt_ov_01 + - arc_easy_alt_ov_02 + - arc_easy_alt_ov_03 + - arc_easy_alt_ov_04 + - arc_easy_alt_ov_05 + - arc_easy_alt_ov_06 + - arc_easy_alt_ov_07 + - arc_easy_alt_ov_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/a.yaml index 2d95b888..2b5eabd0 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_01/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_01 -group_alias: style_01 -task: arc_easy_01a -task_alias: a +group: arc_easy_alt_ov_01 +task: arc_easy_alt_ov_01a doc_to_text: !function ../styles.template_01 doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/b.yaml index 75fef77f..5a4be1cf 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_01/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_01 -group_alias: style_01 -task: arc_easy_01b -task_alias: b +group: arc_easy_alt_ov_01 +task: arc_easy_alt_ov_01b doc_to_text: !function ../styles.template_01 doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/c.yaml index 317233ac..a3f3e82f 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_01/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_01 -group_alias: style_01 -task: arc_easy_01c -task_alias: c +group: arc_easy_alt_ov_01 +task: arc_easy_alt_ov_01c doc_to_text: !function ../styles.template_01 doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/a.yaml index 7819b8bd..9c11dfc8 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_02/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_02 -group_alias: style_02 -task: arc_easy_02a -task_alias: a +group: arc_easy_alt_ov_02 +task: arc_easy_alt_ov_02a doc_to_text: !function ../styles.template_02 doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/b.yaml index 3223a773..68297643 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_02/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_02 -group_alias: style_02 -task: arc_easy_02b -task_alias: b +group: arc_easy_alt_ov_02 +task: arc_easy_alt_ov_02b doc_to_text: !function ../styles.template_02 doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/c.yaml index 6b128406..929ad3d9 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_02/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_02 -group_alias: style_02 -task: arc_easy_02c -task_alias: c +group: arc_easy_alt_ov_02 +task: arc_easy_alt_ov_02c doc_to_text: !function ../styles.template_02 doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/a.yaml index 339f70d8..71e7e2fd 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_03/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_03 -group_alias: style_03 -task: arc_easy_03a -task_alias: a +group: arc_easy_alt_ov_03 +task: arc_easy_alt_ov_03a doc_to_text: !function ../styles.template_03 doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/b.yaml index e700f628..08e17d48 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_03/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_03 -group_alias: style_03 -task: arc_easy_03b -task_alias: b +group: arc_easy_alt_ov_03 +task: arc_easy_alt_ov_03b doc_to_text: !function ../styles.template_03 doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/c.yaml index 6f0feff8..471e93c9 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_03/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_03 -group_alias: style_03 -task: arc_easy_03c -task_alias: c +group: arc_easy_alt_ov_03 +task: arc_easy_alt_ov_03c doc_to_text: !function ../styles.template_03 doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/a.yaml index 99578be0..7c8512a7 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_04/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_04 -group_alias: style_04 -task: arc_easy_04a -task_alias: a +group: arc_easy_alt_ov_04 +task: arc_easy_alt_ov_04a doc_to_text: !function ../styles.template_04 doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/b.yaml index 81edfbc4..bceb21df 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_04/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_04 -group_alias: style_04 -task: arc_easy_04b -task_alias: b +group: arc_easy_alt_ov_04 +task: arc_easy_alt_ov_04b doc_to_text: !function ../styles.template_04 doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/c.yaml index f970376a..14c4908d 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_04/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_04 -group_alias: style_04 -task: arc_easy_04c -task_alias: c +group: arc_easy_alt_ov_04 +task: arc_easy_alt_ov_04c doc_to_text: !function ../styles.template_04 doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/a.yaml index 00cd7aaa..31b9fb0c 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_05/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_05 -group_alias: style_05 -task: arc_easy_05a -task_alias: a +group: arc_easy_alt_ov_05 +task: arc_easy_alt_ov_05a doc_to_text: !function ../styles.template_05 doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/b.yaml index 253e38e5..ac50290c 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_05/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_05 -group_alias: style_05 -task: arc_easy_05b -task_alias: b +group: arc_easy_alt_ov_05 +task: arc_easy_alt_ov_05b doc_to_text: !function ../styles.template_05 doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/c.yaml index 6066248b..bbe63c52 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_05/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_05 -group_alias: style_05 -task: arc_easy_05c -task_alias: c +group: arc_easy_alt_ov_05 +task: arc_easy_alt_ov_05c doc_to_text: !function ../styles.template_05 doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/a.yaml index 1ddeb4f5..5bd7deb0 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_06/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_06 -group_alias: style_06 -task: arc_easy_06a -task_alias: a +group: arc_easy_alt_ov_06 +task: arc_easy_alt_ov_06a doc_to_text: !function ../styles.template_06 doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/b.yaml index 4ef19810..5317195e 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_06/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_06 -group_alias: style_06 -task: arc_easy_06b -task_alias: b +group: arc_easy_alt_ov_06 +task: arc_easy_alt_ov_06b doc_to_text: !function ../styles.template_06 doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/c.yaml index 0ce0c2da..4632e663 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_06/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_06 -group_alias: style_06 -task: arc_easy_06c -task_alias: c +group: arc_easy_alt_ov_06 +task: arc_easy_alt_ov_06c doc_to_text: !function ../styles.template_06 doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/a.yaml index 9229f190..27dd313d 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_07/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_07 -group_alias: style_07 -task: arc_easy_07a -task_alias: a +group: arc_easy_alt_ov_07 +task: arc_easy_alt_ov_07a doc_to_text: !function ../styles.template_07 doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/b.yaml index 85d8017e..de286458 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_07/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_07 -group_alias: style_07 -task: arc_easy_07b -task_alias: b +group: arc_easy_alt_ov_07 +task: arc_easy_alt_ov_07b doc_to_text: !function ../styles.template_07 doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/c.yaml index 9eb8ba53..70219ff9 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_07/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_07 -group_alias: style_07 -task: arc_easy_07c -task_alias: c +group: arc_easy_alt_ov_07 +task: arc_easy_alt_ov_07c doc_to_text: !function ../styles.template_07 doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/a.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/a.yaml index 37dcd2a7..ea5f73ee 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_08/a.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/a.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_08 -group_alias: style_08 -task: arc_easy_08a -task_alias: a +group: arc_easy_alt_ov_08 +task: arc_easy_alt_ov_08a doc_to_text: !function ../styles.template_08 doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/b.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/b.yaml index 0a7df350..503f7cf1 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_08/b.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/b.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_08 -group_alias: style_08 -task: arc_easy_08b -task_alias: b +group: arc_easy_alt_ov_08 +task: arc_easy_alt_ov_08b doc_to_text: !function ../styles.template_08 doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/c.yaml similarity index 51% rename from lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/c.yaml index c94c41f4..8507c6c0 100644 --- a/lm_eval/tasks/arc/alternative_worlds/style_08/c.yaml +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/c.yaml @@ -1,7 +1,6 @@ include: ../_arc_easy_alt_yaml -group: arc_easy_08 -group_alias: style_08 -task: arc_easy_08c -task_alias: c +group: arc_easy_alt_ov_08 +task: arc_easy_alt_ov_08c doc_to_text: !function ../styles.template_08 doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/styles.py b/lm_eval/tasks/arc/alternative_worlds/output_variation/styles.py similarity index 96% rename from lm_eval/tasks/arc/alternative_worlds/styles.py rename to lm_eval/tasks/arc/alternative_worlds/output_variation/styles.py index 3d075517..a4e61348 100644 --- a/lm_eval/tasks/arc/alternative_worlds/styles.py +++ b/lm_eval/tasks/arc/alternative_worlds/output_variation/styles.py @@ -14,11 +14,12 @@ def doc_to_text_base(alphabet, style, doc): else: choice_string = "{} {}" - doc_to_text = "\n\n".join( + doc_to_text = "\n".join( [ - "Question: " + doc["question"] + "\nAnswer:", + "Question: " + doc["question"], ] + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] ) return doc_to_text diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml new file mode 100644 index 00000000..633826a2 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml @@ -0,0 +1,24 @@ +group: + - ai2_arc +task: arc_easy +dataset_path: ai2_arc +dataset_name: ARC-Easy +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 00000000..614f3970 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _arc_easy_alt_yaml +group: arc_easy_alt_pv +task: arc_easy_alt_pv_01 +doc_to_text: "{{question}}" +doc_to_decontamination_query: "{{question}}" diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 00000000..1fb9ae05 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _arc_easy_alt_yaml +group: arc_easy_alt_pv +task: arc_easy_alt_pv_02 +doc_to_text: "Q: {{question}}\nA:" +doc_to_decontamination_query: "Q: {{question}}\nA:" \ No newline at end of file diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 00000000..5006f856 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _arc_easy_alt_yaml +group: arc_easy_alt_pv +task: arc_easy_alt_pv_03 +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" \ No newline at end of file -- GitLab From 66421b57e5b7eb404a1dd00301c4dc620b16b67a Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 8 Dec 2023 03:35:13 +0000 Subject: [PATCH 23/50] add prompt variation --- .../tasks/bbh/alternative_worlds/README.md | 34 ++++++++++ .../prompt_variation/bbh_alt_pv_zeroshot.yaml | 5 ++ .../style_01/zeroshot/_zeroshot_template_yaml | 12 ++++ .../zeroshot/boolean_expressions.yaml | 6 ++ .../style_01/zeroshot/causal_judgement.yaml | 4 ++ .../style_01/zeroshot/date_understanding.yaml | 4 ++ .../style_01/zeroshot/disambiguation_qa.yaml | 4 ++ .../style_01/zeroshot/formal_fallacies.yaml | 6 ++ .../style_01/zeroshot/geometric_shapes.yaml | 4 ++ .../style_01/zeroshot/hyperbaton.yaml | 4 ++ .../logical_deduction_five_objects.yaml | 4 ++ .../logical_deduction_seven_objects.yaml | 4 ++ .../logical_deduction_three_objects.yaml | 4 ++ .../zeroshot/movie_recommendation.yaml | 5 ++ .../style_01/zeroshot/navigate.yaml | 4 ++ .../zeroshot/penguins_in_a_table.yaml | 4 ++ .../reasoning_about_colored_objects.yaml | 4 ++ .../style_01/zeroshot/ruin_names.yaml | 5 ++ .../salient_translation_error_detection.yaml | 4 ++ .../style_01/zeroshot/snarks.yaml | 4 ++ .../zeroshot/sports_understanding.yaml | 6 ++ .../style_01/zeroshot/temporal_sequences.yaml | 4 ++ ...racking_shuffled_objects_five_objects.yaml | 4 ++ ...acking_shuffled_objects_seven_objects.yaml | 4 ++ ...acking_shuffled_objects_three_objects.yaml | 4 ++ .../style_01/zeroshot/web_of_lies.yaml | 6 ++ .../style_02/zeroshot/_zeroshot_template_yaml | 12 ++++ .../zeroshot/boolean_expressions.yaml | 6 ++ .../style_02/zeroshot/causal_judgement.yaml | 4 ++ .../style_02/zeroshot/date_understanding.yaml | 4 ++ .../style_02/zeroshot/disambiguation_qa.yaml | 4 ++ .../style_02/zeroshot/formal_fallacies.yaml | 6 ++ .../style_02/zeroshot/geometric_shapes.yaml | 4 ++ .../style_02/zeroshot/hyperbaton.yaml | 4 ++ .../logical_deduction_five_objects.yaml | 4 ++ .../logical_deduction_seven_objects.yaml | 4 ++ .../logical_deduction_three_objects.yaml | 4 ++ .../zeroshot/movie_recommendation.yaml | 5 ++ .../style_02/zeroshot/navigate.yaml | 4 ++ .../zeroshot/penguins_in_a_table.yaml | 4 ++ .../reasoning_about_colored_objects.yaml | 4 ++ .../style_02/zeroshot/ruin_names.yaml | 5 ++ .../salient_translation_error_detection.yaml | 4 ++ .../style_02/zeroshot/snarks.yaml | 4 ++ .../zeroshot/sports_understanding.yaml | 6 ++ .../style_02/zeroshot/temporal_sequences.yaml | 4 ++ ...racking_shuffled_objects_five_objects.yaml | 4 ++ ...acking_shuffled_objects_seven_objects.yaml | 4 ++ ...acking_shuffled_objects_three_objects.yaml | 4 ++ .../style_02/zeroshot/web_of_lies.yaml | 6 ++ .../style_03/zeroshot/_zeroshot_template_yaml | 12 ++++ .../zeroshot/boolean_expressions.yaml | 6 ++ .../style_03/zeroshot/causal_judgement.yaml | 4 ++ .../style_03/zeroshot/date_understanding.yaml | 4 ++ .../style_03/zeroshot/disambiguation_qa.yaml | 4 ++ .../style_03/zeroshot/formal_fallacies.yaml | 6 ++ .../style_03/zeroshot/geometric_shapes.yaml | 4 ++ .../style_03/zeroshot/hyperbaton.yaml | 4 ++ .../logical_deduction_five_objects.yaml | 4 ++ .../logical_deduction_seven_objects.yaml | 4 ++ .../logical_deduction_three_objects.yaml | 4 ++ .../zeroshot/movie_recommendation.yaml | 5 ++ .../style_03/zeroshot/navigate.yaml | 4 ++ .../zeroshot/penguins_in_a_table.yaml | 4 ++ .../reasoning_about_colored_objects.yaml | 4 ++ .../style_03/zeroshot/ruin_names.yaml | 5 ++ .../salient_translation_error_detection.yaml | 4 ++ .../style_03/zeroshot/snarks.yaml | 4 ++ .../zeroshot/sports_understanding.yaml | 6 ++ .../style_03/zeroshot/temporal_sequences.yaml | 4 ++ ...racking_shuffled_objects_five_objects.yaml | 4 ++ ...acking_shuffled_objects_seven_objects.yaml | 4 ++ ...acking_shuffled_objects_three_objects.yaml | 4 ++ .../style_03/zeroshot/web_of_lies.yaml | 6 ++ .../prompt_variation/styles.py | 64 +++++++++++++++++++ .../prompt_variation/utils.py | 25 ++++++++ 76 files changed, 470 insertions(+) create mode 100644 lm_eval/tasks/bbh/alternative_worlds/README.md create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py create mode 100644 lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py diff --git a/lm_eval/tasks/bbh/alternative_worlds/README.md b/lm_eval/tasks/bbh/alternative_worlds/README.md new file mode 100644 index 00000000..2c0a422d --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/README.md @@ -0,0 +1,34 @@ +| Task | Prompt Variation | Output Variation | Option in Sample | +| :-----------------:| :---------------: | :---------------: |:---------------: | +| boolean_expression | Yes | Yes | No | +| causal_judgement | Yes | Yes | Yes | +| date_understanding | Yes | Yes | Yes | +| disambiguation_qa | Yes | Yes | Yes | +| dyck_languages | Yes | No | No | +| formal_fallacies | Yes | Yes | Yes | +| geometric_shapes | Yes | Yes | Yes | +| hyperbaton | Yes | Yes | Yes | +| logical_deduction_five_objects| Yes | Yes | Yes | +| logical_deduction_seven_objects| Yes | Yes | Yes | +| logical_deduction_three_objects| Yes | Yes | Yes | +| movie_recommendation| Yes | Yes | Yes | +| multistep_arithmetic_two| Yes | No | No | +| navigate | Yes | Yes | Yes | +| object_counting | Yes | No | No | +| penguins_in_a_table| Yes | Yes | Yes | +| reasoning_about_colored_objects| Yes | Yes | Yes | +| ruin_names | Yes | Yes | Yes | +| salient_translation_error_detection| Yes| Yes | Yes | +| snarks | Yes | Yes | Yes | +| sports_understanding| Yes | Yes | No | +| temporal_sequences | Yes | Yes | Yes | +| tracking_shuffled_objects_five_objects| Yes| Yes | Yes | +| tracking_shuffled_objects_seven_objects| Yes| Yes | Yes | +| tracking_shuffled_objects_three_objects| Yes| Yes | Yes | +| web_of_lies | Yes | Yes | No | +| word_sorting | Yes | No | No | + + +Notes: +- `web_of_lies` already starts with `Question: ` +- Tasks with options are `Options: (A) ...` (multiple choice) or `Options: - ...` (binary choice) \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml new file mode 100644 index 00000000..023037f6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/bbh_alt_pv_zeroshot.yaml @@ -0,0 +1,5 @@ +group: bbh_alt_pv_zeroshot +task: + - bbh_alt_pv_01_zeroshot + - bbh_alt_pv_02_zeroshot + - bbh_alt_pv_03_zeroshot diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml new file mode 100644 index 00000000..73cecaa6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/_zeroshot_template_yaml @@ -0,0 +1,12 @@ +group: bbh_alt_pv_01_zeroshot +dataset_path: lukaemon/bbh +output_type: multiple_choice +test_split: test +doc_to_text: !function ../../styles.styles_01 +doc_to_target: !function ../../styles.doc_to_target +doc_to_choice: !function ../../styles.doc_to_choice +num_fewshot: 0 +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml new file mode 100644 index 00000000..aae4bfc1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/boolean_expressions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "boolean_expressions" +"description": "Evaluate the result of a random Boolean expression.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_boolean_expressions" +"doc_to_target": target +"doc_to_choice": ["True", "False"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml new file mode 100644 index 00000000..9dc5dfc3 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/causal_judgement.yaml @@ -0,0 +1,4 @@ +"dataset_name": "causal_judgement" +"description": "Answer questions about causal attribution.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_causal_judgement" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml new file mode 100644 index 00000000..9cb5eef6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/date_understanding.yaml @@ -0,0 +1,4 @@ +"dataset_name": "date_understanding" +"description": "Infer the date from context.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_date_understanding" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml new file mode 100644 index 00000000..df69dd71 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/disambiguation_qa.yaml @@ -0,0 +1,4 @@ +"dataset_name": "disambiguation_qa" +"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_disambiguation_qa" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml new file mode 100644 index 00000000..c993752a --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/formal_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_fallacies" +"description": "Distinguish deductively valid arguments from formal fallacies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_formal_fallacies" +"doc_to_target": target +"doc_to_choice": ["valid", "invalid"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml new file mode 100644 index 00000000..0a5d175e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/geometric_shapes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "geometric_shapes" +"description": "Name geometric shapes from their SVG paths.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_geometric_shapes" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml new file mode 100644 index 00000000..c51a7b93 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/hyperbaton.yaml @@ -0,0 +1,4 @@ +"dataset_name": "hyperbaton" +"description": "Order adjectives correctly in English sentences.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_hyperbaton" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml new file mode 100644 index 00000000..e9704906 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_five_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_logical_deduction_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml new file mode 100644 index 00000000..c33460c9 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_seven_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_logical_deduction_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml new file mode 100644 index 00000000..2654f3a8 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/logical_deduction_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_three_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_logical_deduction_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml new file mode 100644 index 00000000..af71a329 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/movie_recommendation.yaml @@ -0,0 +1,5 @@ +"dataset_name": "movie_recommendation" +"description": "Recommend movies similar to the given list of movies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_movie_recommendation" +"process_docs": !function ../../utils.fix_movie_recommendation \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml new file mode 100644 index 00000000..ebd0a5b3 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/navigate.yaml @@ -0,0 +1,4 @@ +"dataset_name": "navigate" +"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_navigate" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml new file mode 100644 index 00000000..490e45f6 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/penguins_in_a_table.yaml @@ -0,0 +1,4 @@ +"dataset_name": "penguins_in_a_table" +"description": "Answer questions about a table of penguins and their attributes.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_penguins_in_a_table" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml new file mode 100644 index 00000000..edbbb092 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/reasoning_about_colored_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "reasoning_about_colored_objects" +"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_reasoning_about_colored_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml new file mode 100644 index 00000000..c04e5cd2 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/ruin_names.yaml @@ -0,0 +1,5 @@ +"dataset_name": "ruin_names" +"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_ruin_names" +"process_docs": !function ../../utils.fix_ruin_names \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml new file mode 100644 index 00000000..40980d83 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/salient_translation_error_detection.yaml @@ -0,0 +1,4 @@ +"dataset_name": "salient_translation_error_detection" +"description": "Detect the type of error in an English translation of a German source sentence.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_salient_translation_error_detection" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml new file mode 100644 index 00000000..fe58da5d --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/snarks.yaml @@ -0,0 +1,4 @@ +"dataset_name": "snarks" +"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_snarks" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml new file mode 100644 index 00000000..973efa98 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/sports_understanding.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sports_understanding" +"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_sports_understanding" +"doc_to_target": target +"doc_to_choice": ["yes", "no"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml new file mode 100644 index 00000000..60a5069d --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/temporal_sequences.yaml @@ -0,0 +1,4 @@ +"dataset_name": "temporal_sequences" +"description": "Task description: Answer questions about which times certain events could have occurred.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_temporal_sequences" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml new file mode 100644 index 00000000..4df1d2fe --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_five_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml new file mode 100644 index 00000000..7c710c67 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_seven_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml new file mode 100644 index 00000000..7ca47942 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/tracking_shuffled_objects_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_three_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_tracking_shuffled_objects_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml new file mode 100644 index 00000000..5b2fa7f8 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_01/zeroshot/web_of_lies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "web_of_lies" +"description": "Evaluate a random boolean function expressed as a word problem.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_01_zeroshot_web_of_lies" +"doc_to_target": target +"doc_to_choice": ["Yes", "No"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml new file mode 100644 index 00000000..b8c5f1e2 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/_zeroshot_template_yaml @@ -0,0 +1,12 @@ +group: bbh_alt_pv_02_zeroshot +dataset_path: lukaemon/bbh +output_type: multiple_choice +test_split: test +doc_to_text: !function ../../styles.styles_02 +doc_to_target: !function ../../styles.doc_to_target +doc_to_choice: !function ../../styles.doc_to_choice +num_fewshot: 0 +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml new file mode 100644 index 00000000..383566b1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/boolean_expressions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "boolean_expressions" +"description": "Evaluate the result of a random Boolean expression.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_boolean_expressions" +"doc_to_target": target +"doc_to_choice": ["True", "False"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml new file mode 100644 index 00000000..5eba635d --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/causal_judgement.yaml @@ -0,0 +1,4 @@ +"dataset_name": "causal_judgement" +"description": "Answer questions about causal attribution.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_causal_judgement" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml new file mode 100644 index 00000000..3e5a3e93 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/date_understanding.yaml @@ -0,0 +1,4 @@ +"dataset_name": "date_understanding" +"description": "Infer the date from context.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_date_understanding" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml new file mode 100644 index 00000000..a6d8c345 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/disambiguation_qa.yaml @@ -0,0 +1,4 @@ +"dataset_name": "disambiguation_qa" +"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_disambiguation_qa" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml new file mode 100644 index 00000000..50f6425c --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/formal_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_fallacies" +"description": "Distinguish deductively valid arguments from formal fallacies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_formal_fallacies" +"doc_to_target": target +"doc_to_choice": ["valid", "invalid"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml new file mode 100644 index 00000000..228567bb --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/geometric_shapes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "geometric_shapes" +"description": "Name geometric shapes from their SVG paths.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_geometric_shapes" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml new file mode 100644 index 00000000..f79fa0ce --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/hyperbaton.yaml @@ -0,0 +1,4 @@ +"dataset_name": "hyperbaton" +"description": "Order adjectives correctly in English sentences.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_hyperbaton" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml new file mode 100644 index 00000000..bfdfdace --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_five_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_logical_deduction_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml new file mode 100644 index 00000000..bcca9b80 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_seven_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_logical_deduction_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml new file mode 100644 index 00000000..327eea74 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/logical_deduction_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_three_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_logical_deduction_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml new file mode 100644 index 00000000..ba41418e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml @@ -0,0 +1,5 @@ +"dataset_name": "movie_recommendation" +"description": "Recommend movies similar to the given list of movies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_movie_recommendation" +"process_docs": !function ../utils.fix_movie_recommendation \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml new file mode 100644 index 00000000..d6442102 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/navigate.yaml @@ -0,0 +1,4 @@ +"dataset_name": "navigate" +"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_navigate" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml new file mode 100644 index 00000000..186526da --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/penguins_in_a_table.yaml @@ -0,0 +1,4 @@ +"dataset_name": "penguins_in_a_table" +"description": "Answer questions about a table of penguins and their attributes.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_penguins_in_a_table" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml new file mode 100644 index 00000000..25123984 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/reasoning_about_colored_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "reasoning_about_colored_objects" +"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_reasoning_about_colored_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml new file mode 100644 index 00000000..aa1eb14b --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml @@ -0,0 +1,5 @@ +"dataset_name": "ruin_names" +"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_ruin_names" +"process_docs": !function utils.fix_ruin_names \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml new file mode 100644 index 00000000..85d8fb99 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/salient_translation_error_detection.yaml @@ -0,0 +1,4 @@ +"dataset_name": "salient_translation_error_detection" +"description": "Detect the type of error in an English translation of a German source sentence.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_salient_translation_error_detection" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml new file mode 100644 index 00000000..9889128f --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/snarks.yaml @@ -0,0 +1,4 @@ +"dataset_name": "snarks" +"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_snarks" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml new file mode 100644 index 00000000..05d88ac1 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/sports_understanding.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sports_understanding" +"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_sports_understanding" +"doc_to_target": target +"doc_to_choice": ["yes", "no"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml new file mode 100644 index 00000000..74b748f5 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/temporal_sequences.yaml @@ -0,0 +1,4 @@ +"dataset_name": "temporal_sequences" +"description": "Task description: Answer questions about which times certain events could have occurred.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_temporal_sequences" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml new file mode 100644 index 00000000..d9c00352 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_five_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml new file mode 100644 index 00000000..89a5b785 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_seven_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml new file mode 100644 index 00000000..6cb6b89a --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/tracking_shuffled_objects_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_three_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_tracking_shuffled_objects_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml new file mode 100644 index 00000000..893a7e24 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/web_of_lies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "web_of_lies" +"description": "Evaluate a random boolean function expressed as a word problem.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_02_zeroshot_web_of_lies" +"doc_to_target": target +"doc_to_choice": ["Yes", "No"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml new file mode 100644 index 00000000..369c5ecc --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/_zeroshot_template_yaml @@ -0,0 +1,12 @@ +group: bbh_alt_pv_03_zeroshot +dataset_path: lukaemon/bbh +output_type: multiple_choice +test_split: test +doc_to_text: !function ../../styles.styles_03 +doc_to_target: !function ../../styles.doc_to_target +doc_to_choice: !function ../../styles.doc_to_choice +num_fewshot: 0 +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml new file mode 100644 index 00000000..27e9b1a4 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/boolean_expressions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "boolean_expressions" +"description": "Evaluate the result of a random Boolean expression.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_boolean_expressions" +"doc_to_target": target +"doc_to_choice": ["True", "False"] diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml new file mode 100644 index 00000000..824ec01c --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/causal_judgement.yaml @@ -0,0 +1,4 @@ +"dataset_name": "causal_judgement" +"description": "Answer questions about causal attribution.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_causal_judgement" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml new file mode 100644 index 00000000..8a93686c --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/date_understanding.yaml @@ -0,0 +1,4 @@ +"dataset_name": "date_understanding" +"description": "Infer the date from context.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_date_understanding" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml new file mode 100644 index 00000000..bc4324e4 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/disambiguation_qa.yaml @@ -0,0 +1,4 @@ +"dataset_name": "disambiguation_qa" +"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_disambiguation_qa" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml new file mode 100644 index 00000000..9aa3f9cf --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/formal_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_fallacies" +"description": "Distinguish deductively valid arguments from formal fallacies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_formal_fallacies" +"doc_to_target": target +"doc_to_choice": ["valid", "invalid"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml new file mode 100644 index 00000000..719db41e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/geometric_shapes.yaml @@ -0,0 +1,4 @@ +"dataset_name": "geometric_shapes" +"description": "Name geometric shapes from their SVG paths.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_geometric_shapes" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml new file mode 100644 index 00000000..7905a343 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/hyperbaton.yaml @@ -0,0 +1,4 @@ +"dataset_name": "hyperbaton" +"description": "Order adjectives correctly in English sentences.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_hyperbaton" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml new file mode 100644 index 00000000..14566a02 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_five_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_logical_deduction_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml new file mode 100644 index 00000000..51ba44c8 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_seven_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_logical_deduction_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml new file mode 100644 index 00000000..bca7b49c --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/logical_deduction_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "logical_deduction_three_objects" +"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_logical_deduction_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml new file mode 100644 index 00000000..b7603fa7 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml @@ -0,0 +1,5 @@ +"dataset_name": "movie_recommendation" +"description": "Recommend movies similar to the given list of movies.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_movie_recommendation" +"process_docs": !function ../utils.fix_movie_recommendation \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml new file mode 100644 index 00000000..393e364e --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/navigate.yaml @@ -0,0 +1,4 @@ +"dataset_name": "navigate" +"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_navigate" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml new file mode 100644 index 00000000..71adb34b --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/penguins_in_a_table.yaml @@ -0,0 +1,4 @@ +"dataset_name": "penguins_in_a_table" +"description": "Answer questions about a table of penguins and their attributes.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_penguins_in_a_table" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml new file mode 100644 index 00000000..e1ddd9a4 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/reasoning_about_colored_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "reasoning_about_colored_objects" +"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_reasoning_about_colored_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml new file mode 100644 index 00000000..c4d741f3 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml @@ -0,0 +1,5 @@ +"dataset_name": "ruin_names" +"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_ruin_names" +"process_docs": !function utils.fix_ruin_names \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml new file mode 100644 index 00000000..86db045b --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/salient_translation_error_detection.yaml @@ -0,0 +1,4 @@ +"dataset_name": "salient_translation_error_detection" +"description": "Detect the type of error in an English translation of a German source sentence.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_salient_translation_error_detection" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml new file mode 100644 index 00000000..d8f2ca99 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/snarks.yaml @@ -0,0 +1,4 @@ +"dataset_name": "snarks" +"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_snarks" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml new file mode 100644 index 00000000..99769206 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/sports_understanding.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sports_understanding" +"description": "Determine whether an artificially constructed sentence relating to sports is plausible or not.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_sports_understanding" +"doc_to_target": target +"doc_to_choice": ["yes", "no"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml new file mode 100644 index 00000000..bafde6b7 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/temporal_sequences.yaml @@ -0,0 +1,4 @@ +"dataset_name": "temporal_sequences" +"description": "Task description: Answer questions about which times certain events could have occurred.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_temporal_sequences" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml new file mode 100644 index 00000000..bec899a4 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_five_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_five_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_five_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml new file mode 100644 index 00000000..0150b321 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_seven_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_seven_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_seven_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml new file mode 100644 index 00000000..9e08ead9 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/tracking_shuffled_objects_three_objects.yaml @@ -0,0 +1,4 @@ +"dataset_name": "tracking_shuffled_objects_three_objects" +"description": "A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_tracking_shuffled_objects_three_objects" diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml new file mode 100644 index 00000000..6b231142 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/web_of_lies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "web_of_lies" +"description": "Evaluate a random boolean function expressed as a word problem.\n\n" +"include": "_zeroshot_template_yaml" +"task": "bbh_alt_pv_03_zeroshot_web_of_lies" +"doc_to_target": target +"doc_to_choice": ["Yes", "No"] \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py new file mode 100644 index 00000000..279fa2fd --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/styles.py @@ -0,0 +1,64 @@ +import re +import string + +yes_no = ["Yes", "No"] + +def parse_choices(doc): + + input_text = doc["input"] + choice_string = input_text.split("Options:") + if len(choice_string) == 2: + choice_string = choice_string[-1] + if ("- Yes" in choice_string) and ("- No" in choice_string): + choices = yes_no + else: + choices = [ + c[4:].rstrip("\n") + for c in re.findall(r"\([A-Z]\) .*?\n|\([A-Z]\) .*?$", choice_string) + ] + return choices + else: + return [] + +def styles_01(doc): + #Check for choices and remove them + choices = parse_choices(doc) + if choices != []: + doc_to_text = doc["input"].split("Options:")[0] + if doc_to_text[-1] in ["\n", " "]: + doc_to_text = doc_to_text[:-1] + else: + doc_to_text = doc["input"] + return doc_to_text + +def styles_02(doc): + #Check for choices and remove them + doc_to_text = styles_01(doc) + return "Q: "+doc_to_text+"\nA:" + +def styles_03(doc): + #Check for choices and remove them + doc_to_text = styles_01(doc) + return "Question: "+doc_to_text+"\nAnswer:" + +def doc_to_choice(doc): + return parse_choices(doc) + +def doc_to_target(doc): + target = doc["target"] + try: + if target in ["Yes", "No"]: + return yes_no.index(target) + else: + return string.ascii_uppercase.index(target[1:-1]) + # else: + # return parse_choices(doc).index(target) + + except Exception as err: + print("Full Doc") + print(doc) + print("Choices") + print(parse_choices(doc)) + print("Error") + print(err) + import sys; sys.exit() \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py new file mode 100644 index 00000000..21fc6803 --- /dev/null +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/utils.py @@ -0,0 +1,25 @@ +# For fixing line 163 in `movie_recommendation` + +def fix_movie_recommendation(data): + + def _fix(doc): + if doc["target"] == "Monsters, Inc": + doc["input"] = "Find a movie similar to Minority Report, Shrek, Catch Me If You Can, Aladdin:\nOptions:\n(A) Monsters, Inc\n(B) Children of the Night\n(C) The Incredible Shrinking Man\n(D) Town & Country" + doc["target"] = "(A)" + return doc + + return data.map(_fix) + +def fix_ruin_names(data): + + def _fix(doc): + if doc["target"] == "dearth, wind, & fire": + doc["input"] = "Which of the following is a humorous edit of this artist or movie name: 'earth, wind, & fire'?\nOptions:\n(A) eareth, wind, & fire\n(B) earth, bind, & fire\n(C) earthm wind, & fire\n(D) dearth, wind, & fire" + doc["target"] = "(D)" + + elif doc["target"] == "rita, sue and bob poo": + doc["input"] = "Which of the following is a humorous edit of this artist or movie name: 'rita, sue and bob too'?\nOptions:\n(A) rita, sue and bob too\n(B) rita, sue and bob poo\n(C) rita, sue and box too\n(D) rita,y sue and bob too" + doc["target"] = "(B)" + return doc + + return data.map(_fix) \ No newline at end of file -- GitLab From af6e46fd7cc5f7ec5a7562e616dd0586f31ab82f Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 8 Dec 2023 03:35:43 +0000 Subject: [PATCH 24/50] added prompt and output variations --- .../mathqa/alternative_worlds/mathqa_alt.yaml | 10 --------- .../{ => output_variation}/README.md | 2 -- .../{ => output_variation}/_mathqa_alt_yaml | 5 ----- .../output_variation/mathqa_alt.yaml | 10 +++++++++ .../{ => output_variation}/style_01/a.yaml | 6 ++--- .../{ => output_variation}/style_01/b.yaml | 6 ++--- .../{ => output_variation}/style_01/c.yaml | 6 ++--- .../{ => output_variation}/style_02/a.yaml | 6 ++--- .../{ => output_variation}/style_02/b.yaml | 6 ++--- .../{ => output_variation}/style_02/c.yaml | 6 ++--- .../{ => output_variation}/style_03/a.yaml | 6 ++--- .../{ => output_variation}/style_03/b.yaml | 6 ++--- .../{ => output_variation}/style_03/c.yaml | 6 ++--- .../{ => output_variation}/style_04/a.yaml | 6 ++--- .../{ => output_variation}/style_04/b.yaml | 6 ++--- .../{ => output_variation}/style_04/c.yaml | 6 ++--- .../{ => output_variation}/style_05/a.yaml | 6 ++--- .../{ => output_variation}/style_05/b.yaml | 6 ++--- .../{ => output_variation}/style_05/c.yaml | 6 ++--- .../{ => output_variation}/style_06/a.yaml | 6 ++--- .../{ => output_variation}/style_06/b.yaml | 6 ++--- .../{ => output_variation}/style_06/c.yaml | 6 ++--- .../{ => output_variation}/style_07/a.yaml | 6 ++--- .../{ => output_variation}/style_07/b.yaml | 6 ++--- .../{ => output_variation}/style_07/c.yaml | 6 ++--- .../{ => output_variation}/style_08/a.yaml | 6 ++--- .../{ => output_variation}/style_08/b.yaml | 6 ++--- .../{ => output_variation}/style_08/c.yaml | 6 ++--- .../{ => output_variation}/styles.py | 22 +++---------------- .../prompt_variation/style_01.yaml | 16 ++++++++++++++ .../prompt_variation/style_02.yaml | 16 ++++++++++++++ .../prompt_variation/style_03.yaml | 16 ++++++++++++++ 32 files changed, 109 insertions(+), 132 deletions(-) delete mode 100644 lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/README.md (99%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/_mathqa_alt_yaml (66%) create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_01/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_01/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_01/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_02/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_02/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_02/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_03/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_03/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_03/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_04/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_04/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_04/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_05/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_05/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_05/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_06/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_06/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_06/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_07/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_07/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_07/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_08/a.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_08/b.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/style_08/c.yaml (63%) rename lm_eval/tasks/mathqa/alternative_worlds/{ => output_variation}/styles.py (97%) create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml diff --git a/lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml b/lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml deleted file mode 100644 index e2d308ab..00000000 --- a/lm_eval/tasks/mathqa/alternative_worlds/mathqa_alt.yaml +++ /dev/null @@ -1,10 +0,0 @@ -group: mathqa_alt -task: - - mathqa_01 - - mathqa_02 - - mathqa_03 - - mathqa_04 - - mathqa_05 - - mathqa_06 - - mathqa_07 - - mathqa_08 diff --git a/lm_eval/tasks/mathqa/alternative_worlds/README.md b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/README.md similarity index 99% rename from lm_eval/tasks/mathqa/alternative_worlds/README.md rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/README.md index 93600ae1..6f89dda2 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/README.md +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/README.md @@ -1,5 +1,4 @@ - Investigate affect of letter options - (A) - A) @@ -9,7 +8,6 @@ Investigate affect of letter options - a) - a. - a\t - Answer types: - letters only - original option diff --git a/lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/_mathqa_alt_yaml similarity index 66% rename from lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/_mathqa_alt_yaml index e13ecd93..cca78340 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/_mathqa_alt_yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/_mathqa_alt_yaml @@ -6,10 +6,5 @@ test_split: test doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" metric_list: - metric: acc - aggregation: mean - higher_is_better: true - metric: acc_norm - aggregation: mean - higher_is_better: true - metric: brier_score - higher_is_better: false diff --git a/lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml new file mode 100644 index 00000000..d13679d1 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/mathqa_alt.yaml @@ -0,0 +1,10 @@ +group: mathqa_alt_ov +task: + - mathqa_alt_ov_01 + - mathqa_alt_ov_02 + - mathqa_alt_ov_03 + - mathqa_alt_ov_04 + - mathqa_alt_ov_05 + - mathqa_alt_ov_06 + - mathqa_alt_ov_07 + - mathqa_alt_ov_08 diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/a.yaml index ba88e90f..ca38bfd7 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_01/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_01 -group_alias: style_01 -task: mathqa_01a -task_alias: a +group: mathqa_alt_ov_01 +task: mathqa_alt_ov_01a doc_to_text: !function ../styles.template_01 doc_to_choice: !function ../styles.choice_01a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/b.yaml index 879dc769..ed2eae14 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_01/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_01 -group_alias: style_01 -task: mathqa_01b -task_alias: b +group: mathqa_alt_ov_01 +task: mathqa_alt_ov_01b doc_to_text: !function ../styles.template_01 doc_to_choice: !function ../styles.choice_01b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/c.yaml index b05afe50..35a9d041 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_01/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_01/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_01 -group_alias: style_01 -task: mathqa_01c -task_alias: c +group: mathqa_alt_ov_01 +task: mathqa_alt_ov_01c doc_to_text: !function ../styles.template_01 doc_to_choice: !function ../styles.choice_01c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/a.yaml index 87899d82..d6a259f2 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_02/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_02 -group_alias: style_02 -task: mathqa_02a -task_alias: a +group: mathqa_alt_ov_02 +task: mathqa_alt_ov_02a doc_to_text: !function ../styles.template_02 doc_to_choice: !function ../styles.choice_02a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/b.yaml index 40dd7c76..31ec5c1b 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_02/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_02 -group_alias: style_02 -task: mathqa_02b -task_alias: b +group: mathqa_alt_ov_02 +task: mathqa_alt_ov_02b doc_to_text: !function ../styles.template_02 doc_to_choice: !function ../styles.choice_02b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/c.yaml index ab981ac1..4a5e1dfa 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_02/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_02/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_02 -group_alias: style_02 -task: mathqa_02c -task_alias: c +group: mathqa_alt_ov_02 +task: mathqa_alt_ov_02c doc_to_text: !function ../styles.template_02 doc_to_choice: !function ../styles.choice_02c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/a.yaml index c1732de6..22c1466c 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_03/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_03 -group_alias: style_03 -task: mathqa_03a -task_alias: a +group: mathqa_alt_ov_03 +task: mathqa_alt_ov_03a doc_to_text: !function ../styles.template_03 doc_to_choice: !function ../styles.choice_03a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/b.yaml index 31b5583b..83ae0a0e 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_03/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_03 -group_alias: style_03 -task: mathqa_03b -task_alias: b +group: mathqa_alt_ov_03 +task: mathqa_alt_ov_03b doc_to_text: !function ../styles.template_03 doc_to_choice: !function ../styles.choice_03b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/c.yaml index e57e3e2e..f9bb436f 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_03/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_03/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_03 -group_alias: style_03 -task: mathqa_03c -task_alias: c +group: mathqa_alt_ov_03 +task: mathqa_alt_ov_03c doc_to_text: !function ../styles.template_03 doc_to_choice: !function ../styles.choice_03c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/a.yaml index 8123eae6..fa9f1ee7 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_04/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_04 -group_alias: style_04 -task: mathqa_04a -task_alias: a +group: mathqa_alt_ov_04 +task: mathqa_alt_ov_04a doc_to_text: !function ../styles.template_04 doc_to_choice: !function ../styles.choice_04a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/b.yaml index a76ee54c..b68fa2b4 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_04/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_04 -group_alias: style_04 -task: mathqa_04b -task_alias: b +group: mathqa_alt_ov_04 +task: mathqa_alt_ov_04b doc_to_text: !function ../styles.template_04 doc_to_choice: !function ../styles.choice_04b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/c.yaml index 2397b7e5..d7f8eb20 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_04/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_04/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_04 -group_alias: style_04 -task: mathqa_04c -task_alias: c +group: mathqa_alt_ov_04 +task: mathqa_alt_ov_04c doc_to_text: !function ../styles.template_04 doc_to_choice: !function ../styles.choice_04c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/a.yaml index 0ab6b84a..8431baa5 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_05/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_05 -group_alias: style_05 -task: mathqa_05a -task_alias: a +group: mathqa_alt_ov_05 +task: mathqa_alt_ov_05a doc_to_text: !function ../styles.template_05 doc_to_choice: !function ../styles.choice_05a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/b.yaml index 02b48057..b13d6479 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_05/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_05 -group_alias: style_05 -task: mathqa_05b -task_alias: b +group: mathqa_alt_ov_05 +task: mathqa_alt_ov_05b doc_to_text: !function ../styles.template_05 doc_to_choice: !function ../styles.choice_05b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/c.yaml index b8ac931f..fb23a69d 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_05/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_05/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_05 -group_alias: style_05 -task: mathqa_05c -task_alias: c +group: mathqa_alt_ov_05 +task: mathqa_alt_ov_05c doc_to_text: !function ../styles.template_05 doc_to_choice: !function ../styles.choice_05c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/a.yaml index 91980ebd..99e8c27a 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_06/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_06 -group_alias: style_06 -task: mathqa_06a -task_alias: a +group: mathqa_alt_ov_06 +task: mathqa_alt_ov_06a doc_to_text: !function ../styles.template_06 doc_to_choice: !function ../styles.choice_06a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/b.yaml index 82e4ee90..77fd1d62 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_06/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_06 -group_alias: style_06 -task: mathqa_06b -task_alias: b +group: mathqa_alt_ov_06 +task: mathqa_alt_ov_06b doc_to_text: !function ../styles.template_06 doc_to_choice: !function ../styles.choice_06b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/c.yaml index ef7a9b03..af60a24e 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_06/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_06/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_06 -group_alias: style_06 -task: mathqa_06c -task_alias: c +group: mathqa_alt_ov_06 +task: mathqa_alt_ov_06c doc_to_text: !function ../styles.template_06 doc_to_choice: !function ../styles.choice_06c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/a.yaml index 430afd93..00a81155 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_07/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_07 -group_alias: style_07 -task: mathqa_07a -task_alias: a +group: mathqa_alt_ov_07 +task: mathqa_alt_ov_07a doc_to_text: !function ../styles.template_07 doc_to_choice: !function ../styles.choice_07a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/b.yaml index b86771f2..6f9e1b2a 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_07/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_07 -group_alias: style_07 -task: mathqa_07b -task_alias: b +group: mathqa_alt_ov_07 +task: mathqa_alt_ov_07b doc_to_text: !function ../styles.template_07 doc_to_choice: !function ../styles.choice_07b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/c.yaml index 40deb212..0b40d282 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_07/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_07/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_07 -group_alias: style_07 -task: mathqa_07c -task_alias: c +group: mathqa_alt_ov_07 +task: mathqa_alt_ov_07c doc_to_text: !function ../styles.template_07 doc_to_choice: !function ../styles.choice_07c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/a.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/a.yaml index 5321598e..f2bb3788 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_08/a.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/a.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_08 -group_alias: style_08 -task: mathqa_08a -task_alias: a +group: mathqa_alt_ov_08 +task: mathqa_alt_ov_08a doc_to_text: !function ../styles.template_08 doc_to_choice: !function ../styles.choice_08a diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/b.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/b.yaml index 135d41e5..4b3b2a67 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_08/b.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/b.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_08 -group_alias: style_08 -task: mathqa_08b -task_alias: b +group: mathqa_alt_ov_08 +task: mathqa_alt_ov_08b doc_to_text: !function ../styles.template_08 doc_to_choice: !function ../styles.choice_08b diff --git a/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/c.yaml similarity index 63% rename from lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/c.yaml index c00a30e2..2242027d 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/style_08/c.yaml +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/style_08/c.yaml @@ -1,7 +1,5 @@ include: ../_mathqa_alt_yaml -group: mathqa_08 -group_alias: style_08 -task: mathqa_08c -task_alias: c +group: mathqa_alt_ov_08 +task: mathqa_alt_ov_08c doc_to_text: !function ../styles.template_08 doc_to_choice: !function ../styles.choice_08c diff --git a/lm_eval/tasks/mathqa/alternative_worlds/styles.py b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/styles.py similarity index 97% rename from lm_eval/tasks/mathqa/alternative_worlds/styles.py rename to lm_eval/tasks/mathqa/alternative_worlds/output_variation/styles.py index 3b72cf05..8a88de87 100644 --- a/lm_eval/tasks/mathqa/alternative_worlds/styles.py +++ b/lm_eval/tasks/mathqa/alternative_worlds/output_variation/styles.py @@ -2,7 +2,6 @@ import re import string from functools import partial - def parse_choices(doc): choices = [ c[4:].rstrip(" ,") @@ -10,58 +9,43 @@ def parse_choices(doc): ] return choices - def doc_to_text_base(alphabet, style, doc): - choices = parse_choices(doc) - num = len(choices) - letter_list = [style.format(letter) for letter in alphabet[0:num]] - if "\t" in style: choice_string = "{}{}" else: choice_string = "{} {}" - doc_to_text = "\n\n".join( - [doc["Problem"]] + doc_to_text = "\n".join( + ["Question: " + doc["Problem"]] + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] ) - return doc_to_text - # Full continuation def choice_A(doc): return parse_choices(doc) - # Letters only def choice_B(alphabet, style, doc): - choices = parse_choices(doc) num = len(choices) - letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" in style: letter_list = [letter.replace("\t", "") for letter in letter_list] - return letter_list - # Letters + Full continuation def choice_C(alphabet, style, doc): - choices = parse_choices(doc) num = len(choices) - letter_list = [style.format(letter) for letter in alphabet[0:num]] if "\t" not in style: letter_list = [letter + " " for letter in letter_list] - return [letter + choice for letter, choice in zip(letter_list, choices)] - template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") choice_01a = choice_A choice_01b = partial(choice_B, string.ascii_lowercase, "({})") diff --git a/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 00000000..0c91f8e3 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,16 @@ +group: mathqa_alt_pv +task: mathqa_alt_pv_01 +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{Problem}}" +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +doc_to_choice: !function ../../utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "{{Problem}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 00000000..16183cc3 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,16 @@ +group: mathqa_alt_pv +task: mathqa_alt_pv_02 +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Q: {{Problem}}\nA:" +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +doc_to_choice: !function ../../utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "Q: {{Problem}}\nA:" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 00000000..c757e6d2 --- /dev/null +++ b/lm_eval/tasks/mathqa/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,16 @@ +group: mathqa_alt_pv +task: mathqa_alt_pv_03 +dataset_path: math_qa +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{Problem}}\nAnswer:" +doc_to_target: "{{['a', 'b', 'c', 'd', 'e'].index(correct)}}" +doc_to_choice: !function ../../utils.doc_to_choice +should_decontaminate: true +doc_to_decontamination_query: "Question: {{Problem}}\nAnswer:" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score -- GitLab From 1ff18c9ac4f8eee9b399dc3668602315aad8f05a Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 8 Dec 2023 03:48:01 +0000 Subject: [PATCH 25/50] added prompt and output variations --- .../output_variation/README.md | 20 +++++ .../output_variation/_sciq_alt_yaml | 23 +++++ .../output_variation/sciq_alt_ov.yaml | 10 +++ .../output_variation/style_01/a.yaml | 5 ++ .../output_variation/style_01/b.yaml | 5 ++ .../output_variation/style_01/c.yaml | 5 ++ .../output_variation/style_02/a.yaml | 5 ++ .../output_variation/style_02/b.yaml | 5 ++ .../output_variation/style_02/c.yaml | 5 ++ .../output_variation/style_03/a.yaml | 5 ++ .../output_variation/style_03/b.yaml | 5 ++ .../output_variation/style_03/c.yaml | 5 ++ .../output_variation/style_04/a.yaml | 5 ++ .../output_variation/style_04/b.yaml | 5 ++ .../output_variation/style_04/c.yaml | 5 ++ .../output_variation/style_05/a.yaml | 5 ++ .../output_variation/style_05/b.yaml | 5 ++ .../output_variation/style_05/c.yaml | 5 ++ .../output_variation/style_06/a.yaml | 5 ++ .../output_variation/style_06/b.yaml | 5 ++ .../output_variation/style_06/c.yaml | 5 ++ .../output_variation/style_07/a.yaml | 5 ++ .../output_variation/style_07/b.yaml | 5 ++ .../output_variation/style_07/c.yaml | 5 ++ .../output_variation/style_08/a.yaml | 5 ++ .../output_variation/style_08/b.yaml | 5 ++ .../output_variation/style_08/c.yaml | 5 ++ .../output_variation/styles.py | 86 +++++++++++++++++++ .../prompt_variation/style_01.yaml | 17 ++++ .../prompt_variation/style_02.yaml | 17 ++++ .../prompt_variation/style_03.yaml | 17 ++++ 31 files changed, 310 insertions(+) create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py create mode 100644 lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md b/lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md new file mode 100644 index 00000000..a9f58e69 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml new file mode 100644 index 00000000..f4241098 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/_sciq_alt_yaml @@ -0,0 +1,23 @@ +group: + - ai2_arc +task: sciq +dataset_path: ai2_arc +dataset_name: ARC-Easy +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml new file mode 100644 index 00000000..6810b9db --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/sciq_alt_ov.yaml @@ -0,0 +1,10 @@ +group: sciq_alt_ov +task: + - sciq_alt_ov_01 + - sciq_alt_ov_02 + - sciq_alt_ov_03 + - sciq_alt_ov_04 + - sciq_alt_ov_05 + - sciq_alt_ov_06 + - sciq_alt_ov_07 + - sciq_alt_ov_08 diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 00000000..a5e9f654 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_01 +task: sciq_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 00000000..ee3b6e84 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_01 +task: sciq_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 00000000..4cda4a03 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_01 +task: sciq_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 00000000..02b5a793 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_02 +task: sciq_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 00000000..da4105f2 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_02 +task: sciq_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 00000000..9efeb696 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_02 +task: sciq_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 00000000..447ed5e3 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_03 +task: sciq_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 00000000..b28752c7 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_03 +task: sciq_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 00000000..45a6d2fd --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_03 +task: sciq_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 00000000..185ca0a5 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_04 +task: sciq_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 00000000..93fe6d5e --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_04 +task: sciq_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 00000000..6a819085 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_04 +task: sciq_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 00000000..cf6c4db3 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_05 +task: sciq_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 00000000..5760370b --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_05 +task: sciq_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 00000000..0009ee0f --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_05 +task: sciq_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 00000000..ba42c8e6 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_06 +task: sciq_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 00000000..72d92270 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_06 +task: sciq_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 00000000..b68b8745 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_06 +task: sciq_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 00000000..8276b942 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_07 +task: sciq_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 00000000..89381cf7 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_07 +task: sciq_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 00000000..eda510f1 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_07 +task: sciq_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 00000000..379a16be --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_08 +task: sciq_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 00000000..1cf016b1 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_08 +task: sciq_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 00000000..0c40dc8c --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,5 @@ +include: ../_sciq_alt_yaml +group: sciq_alt_ov_08 +task: sciq_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c \ No newline at end of file diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py new file mode 100644 index 00000000..c0cb5b12 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/output_variation/styles.py @@ -0,0 +1,86 @@ +import string +from functools import partial + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n\n".join([ + "Question: "+doc["question"]+"\nAnswer:", + ] + [ + choice_string.format(i,j) for i,j in zip(letter_list, choices) + ] + ) + + return doc_to_text + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t","") for letter in letter_list] + + return letter_list + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter+" " for letter in letter_list] + + return [letter+choice for letter, choice in zip(letter_list, choices)] + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") + + diff --git a/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 00000000..c653b2d9 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,17 @@ +group: sciq_alt_pv +task: sciq_alt_pv_01 +dataset_path: sciq +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{support.lstrip()}}\n{{question}}" +doc_to_target: 3 +doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{support}} {{question}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 00000000..a9498269 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,17 @@ +group: sciq_alt_pv +task: sciq_alt_pv_02 +dataset_path: sciq +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{support.lstrip()}}\nQ: {{question}}\nA:" +doc_to_target: 3 +doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{support}} {{question}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 00000000..12049228 --- /dev/null +++ b/lm_eval/tasks/sciq/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,17 @@ +group: sciq_alt_pv +task: sciq_alt_pv_03 +dataset_path: sciq +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:" +doc_to_target: 3 +doc_to_choice: "{{[distractor1, distractor2, distractor3, correct_answer]}}" +should_decontaminate: true +doc_to_decontamination_query: "{{support}} {{question}}" +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score -- GitLab From 1653632c45a5bb33d9fdd9d92520cdc4ae5ed0af Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 8 Dec 2023 03:48:38 +0000 Subject: [PATCH 26/50] remove README.md --- .../output_variation/README.md | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md diff --git a/lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md b/lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md deleted file mode 100644 index a9f58e69..00000000 --- a/lm_eval/tasks/sciq/alternative_worlds/output_variation/README.md +++ /dev/null @@ -1,20 +0,0 @@ - - -Investigate affect of letter options -- (A) -- A) -- A. -- A\t -- (a) -- a) -- a. -- a\t - -Answer types: -- letters only - - original option - - just letter -- letters + continuation - - original option - - just letter -- continuation \ No newline at end of file -- GitLab From ec4cdfed88f50f34ce01c1f4e5936c1f99c5a51a Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:15:56 +0000 Subject: [PATCH 27/50] fixed file path --- .../style_02/zeroshot/movie_recommendation.yaml | 2 +- .../prompt_variation/style_02/zeroshot/ruin_names.yaml | 2 +- .../style_03/zeroshot/movie_recommendation.yaml | 2 +- .../prompt_variation/style_03/zeroshot/ruin_names.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml index ba41418e..81615393 100644 --- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/movie_recommendation.yaml @@ -2,4 +2,4 @@ "description": "Recommend movies similar to the given list of movies.\n\n" "include": "_zeroshot_template_yaml" "task": "bbh_alt_pv_02_zeroshot_movie_recommendation" -"process_docs": !function ../utils.fix_movie_recommendation \ No newline at end of file +"process_docs": !function ../../utils.fix_movie_recommendation \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml index aa1eb14b..dd20afdb 100644 --- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_02/zeroshot/ruin_names.yaml @@ -2,4 +2,4 @@ "description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" "include": "_zeroshot_template_yaml" "task": "bbh_alt_pv_02_zeroshot_ruin_names" -"process_docs": !function utils.fix_ruin_names \ No newline at end of file +"process_docs": !function ../../utils.fix_ruin_names \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml index b7603fa7..470e167d 100644 --- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/movie_recommendation.yaml @@ -2,4 +2,4 @@ "description": "Recommend movies similar to the given list of movies.\n\n" "include": "_zeroshot_template_yaml" "task": "bbh_alt_pv_03_zeroshot_movie_recommendation" -"process_docs": !function ../utils.fix_movie_recommendation \ No newline at end of file +"process_docs": !function ../../utils.fix_movie_recommendation \ No newline at end of file diff --git a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml index c4d741f3..67750b2b 100644 --- a/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml +++ b/lm_eval/tasks/bbh/alternative_worlds/prompt_variation/style_03/zeroshot/ruin_names.yaml @@ -2,4 +2,4 @@ "description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n" "include": "_zeroshot_template_yaml" "task": "bbh_alt_pv_03_zeroshot_ruin_names" -"process_docs": !function utils.fix_ruin_names \ No newline at end of file +"process_docs": !function ../../utils.fix_ruin_names \ No newline at end of file -- GitLab From 09cac5dc1171c9c9503f7bda698394a5581f3828 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:16:34 +0000 Subject: [PATCH 28/50] remove style --- .../alternative_worlds/style_01/a/_template_yaml | 15 --------------- .../style_01/a/style_01_abstract_algebra.yaml | 4 ---- .../style_01/a/style_01_anatomy.yaml | 4 ---- .../style_01/a/style_01_astronomy.yaml | 4 ---- .../style_01/a/style_01_business_ethics.yaml | 4 ---- .../style_01/a/style_01_clinical_knowledge.yaml | 4 ---- .../style_01/a/style_01_college_biology.yaml | 4 ---- .../style_01/a/style_01_college_chemistry.yaml | 4 ---- .../a/style_01_college_computer_science.yaml | 4 ---- .../style_01/a/style_01_college_mathematics.yaml | 4 ---- .../style_01/a/style_01_college_medicine.yaml | 4 ---- .../style_01/a/style_01_college_physics.yaml | 4 ---- .../style_01/a/style_01_computer_security.yaml | 4 ---- .../style_01/a/style_01_conceptual_physics.yaml | 4 ---- .../style_01/a/style_01_econometrics.yaml | 4 ---- .../a/style_01_electrical_engineering.yaml | 4 ---- .../a/style_01_elementary_mathematics.yaml | 4 ---- .../style_01/a/style_01_formal_logic.yaml | 4 ---- .../style_01/a/style_01_global_facts.yaml | 4 ---- .../style_01/a/style_01_high_school_biology.yaml | 4 ---- .../a/style_01_high_school_chemistry.yaml | 4 ---- .../a/style_01_high_school_computer_science.yaml | 4 ---- .../a/style_01_high_school_european_history.yaml | 4 ---- .../a/style_01_high_school_geography.yaml | 4 ---- ...le_01_high_school_government_and_politics.yaml | 4 ---- .../a/style_01_high_school_macroeconomics.yaml | 4 ---- .../a/style_01_high_school_mathematics.yaml | 4 ---- .../a/style_01_high_school_microeconomics.yaml | 4 ---- .../style_01/a/style_01_high_school_physics.yaml | 4 ---- .../a/style_01_high_school_psychology.yaml | 4 ---- .../a/style_01_high_school_statistics.yaml | 4 ---- .../a/style_01_high_school_us_history.yaml | 4 ---- .../a/style_01_high_school_world_history.yaml | 4 ---- .../style_01/a/style_01_human_aging.yaml | 4 ---- .../style_01/a/style_01_human_sexuality.yaml | 4 ---- .../style_01/a/style_01_international_law.yaml | 4 ---- .../style_01/a/style_01_jurisprudence.yaml | 4 ---- .../style_01/a/style_01_logical_fallacies.yaml | 4 ---- .../style_01/a/style_01_machine_learning.yaml | 4 ---- .../style_01/a/style_01_management.yaml | 4 ---- .../style_01/a/style_01_marketing.yaml | 4 ---- .../style_01/a/style_01_medical_genetics.yaml | 4 ---- .../style_01/a/style_01_miscellaneous.yaml | 4 ---- .../style_01/a/style_01_moral_disputes.yaml | 4 ---- .../style_01/a/style_01_moral_scenarios.yaml | 4 ---- .../style_01/a/style_01_nutrition.yaml | 4 ---- .../style_01/a/style_01_philosophy.yaml | 4 ---- .../style_01/a/style_01_prehistory.yaml | 4 ---- .../a/style_01_professional_accounting.yaml | 4 ---- .../style_01/a/style_01_professional_law.yaml | 4 ---- .../a/style_01_professional_medicine.yaml | 4 ---- .../a/style_01_professional_psychology.yaml | 4 ---- .../style_01/a/style_01_public_relations.yaml | 4 ---- .../style_01/a/style_01_security_studies.yaml | 4 ---- .../style_01/a/style_01_sociology.yaml | 4 ---- .../style_01/a/style_01_us_foreign_policy.yaml | 4 ---- .../style_01/a/style_01_virology.yaml | 4 ---- .../style_01/a/style_01_world_religions.yaml | 4 ---- .../alternative_worlds/style_01/b/_template_yaml | 15 --------------- .../style_01/b/style_01_abstract_algebra.yaml | 4 ---- .../style_01/b/style_01_anatomy.yaml | 4 ---- .../style_01/b/style_01_astronomy.yaml | 4 ---- .../style_01/b/style_01_business_ethics.yaml | 4 ---- .../style_01/b/style_01_clinical_knowledge.yaml | 4 ---- .../style_01/b/style_01_college_biology.yaml | 4 ---- .../style_01/b/style_01_college_chemistry.yaml | 4 ---- .../b/style_01_college_computer_science.yaml | 4 ---- .../style_01/b/style_01_college_mathematics.yaml | 4 ---- .../style_01/b/style_01_college_medicine.yaml | 4 ---- .../style_01/b/style_01_college_physics.yaml | 4 ---- .../style_01/b/style_01_computer_security.yaml | 4 ---- .../style_01/b/style_01_conceptual_physics.yaml | 4 ---- .../style_01/b/style_01_econometrics.yaml | 4 ---- .../b/style_01_electrical_engineering.yaml | 4 ---- .../b/style_01_elementary_mathematics.yaml | 4 ---- .../style_01/b/style_01_formal_logic.yaml | 4 ---- .../style_01/b/style_01_global_facts.yaml | 4 ---- .../style_01/b/style_01_high_school_biology.yaml | 4 ---- .../b/style_01_high_school_chemistry.yaml | 4 ---- .../b/style_01_high_school_computer_science.yaml | 4 ---- .../b/style_01_high_school_european_history.yaml | 4 ---- .../b/style_01_high_school_geography.yaml | 4 ---- ...le_01_high_school_government_and_politics.yaml | 4 ---- .../b/style_01_high_school_macroeconomics.yaml | 4 ---- .../b/style_01_high_school_mathematics.yaml | 4 ---- .../b/style_01_high_school_microeconomics.yaml | 4 ---- .../style_01/b/style_01_high_school_physics.yaml | 4 ---- .../b/style_01_high_school_psychology.yaml | 4 ---- .../b/style_01_high_school_statistics.yaml | 4 ---- .../b/style_01_high_school_us_history.yaml | 4 ---- .../b/style_01_high_school_world_history.yaml | 4 ---- .../style_01/b/style_01_human_aging.yaml | 4 ---- .../style_01/b/style_01_human_sexuality.yaml | 4 ---- .../style_01/b/style_01_international_law.yaml | 4 ---- .../style_01/b/style_01_jurisprudence.yaml | 4 ---- .../style_01/b/style_01_logical_fallacies.yaml | 4 ---- .../style_01/b/style_01_machine_learning.yaml | 4 ---- .../style_01/b/style_01_management.yaml | 4 ---- .../style_01/b/style_01_marketing.yaml | 4 ---- .../style_01/b/style_01_medical_genetics.yaml | 4 ---- .../style_01/b/style_01_miscellaneous.yaml | 4 ---- .../style_01/b/style_01_moral_disputes.yaml | 4 ---- .../style_01/b/style_01_moral_scenarios.yaml | 4 ---- .../style_01/b/style_01_nutrition.yaml | 4 ---- .../style_01/b/style_01_philosophy.yaml | 4 ---- .../style_01/b/style_01_prehistory.yaml | 4 ---- .../b/style_01_professional_accounting.yaml | 4 ---- .../style_01/b/style_01_professional_law.yaml | 4 ---- .../b/style_01_professional_medicine.yaml | 4 ---- .../b/style_01_professional_psychology.yaml | 4 ---- .../style_01/b/style_01_public_relations.yaml | 4 ---- .../style_01/b/style_01_security_studies.yaml | 4 ---- .../style_01/b/style_01_sociology.yaml | 4 ---- .../style_01/b/style_01_us_foreign_policy.yaml | 4 ---- .../style_01/b/style_01_virology.yaml | 4 ---- .../style_01/b/style_01_world_religions.yaml | 4 ---- .../alternative_worlds/style_01/c/_template_yaml | 15 --------------- .../style_01/c/style_01_abstract_algebra.yaml | 4 ---- .../style_01/c/style_01_anatomy.yaml | 4 ---- .../style_01/c/style_01_astronomy.yaml | 4 ---- .../style_01/c/style_01_business_ethics.yaml | 4 ---- .../style_01/c/style_01_clinical_knowledge.yaml | 4 ---- .../style_01/c/style_01_college_biology.yaml | 4 ---- .../style_01/c/style_01_college_chemistry.yaml | 4 ---- .../c/style_01_college_computer_science.yaml | 4 ---- .../style_01/c/style_01_college_mathematics.yaml | 4 ---- .../style_01/c/style_01_college_medicine.yaml | 4 ---- .../style_01/c/style_01_college_physics.yaml | 4 ---- .../style_01/c/style_01_computer_security.yaml | 4 ---- .../style_01/c/style_01_conceptual_physics.yaml | 4 ---- .../style_01/c/style_01_econometrics.yaml | 4 ---- .../c/style_01_electrical_engineering.yaml | 4 ---- .../c/style_01_elementary_mathematics.yaml | 4 ---- .../style_01/c/style_01_formal_logic.yaml | 4 ---- .../style_01/c/style_01_global_facts.yaml | 4 ---- .../style_01/c/style_01_high_school_biology.yaml | 4 ---- .../c/style_01_high_school_chemistry.yaml | 4 ---- .../c/style_01_high_school_computer_science.yaml | 4 ---- .../c/style_01_high_school_european_history.yaml | 4 ---- .../c/style_01_high_school_geography.yaml | 4 ---- ...le_01_high_school_government_and_politics.yaml | 4 ---- .../c/style_01_high_school_macroeconomics.yaml | 4 ---- .../c/style_01_high_school_mathematics.yaml | 4 ---- .../c/style_01_high_school_microeconomics.yaml | 4 ---- .../style_01/c/style_01_high_school_physics.yaml | 4 ---- .../c/style_01_high_school_psychology.yaml | 4 ---- .../c/style_01_high_school_statistics.yaml | 4 ---- .../c/style_01_high_school_us_history.yaml | 4 ---- .../c/style_01_high_school_world_history.yaml | 4 ---- .../style_01/c/style_01_human_aging.yaml | 4 ---- .../style_01/c/style_01_human_sexuality.yaml | 4 ---- .../style_01/c/style_01_international_law.yaml | 4 ---- .../style_01/c/style_01_jurisprudence.yaml | 4 ---- .../style_01/c/style_01_logical_fallacies.yaml | 4 ---- .../style_01/c/style_01_machine_learning.yaml | 4 ---- .../style_01/c/style_01_management.yaml | 4 ---- .../style_01/c/style_01_marketing.yaml | 4 ---- .../style_01/c/style_01_medical_genetics.yaml | 4 ---- .../style_01/c/style_01_miscellaneous.yaml | 4 ---- .../style_01/c/style_01_moral_disputes.yaml | 4 ---- .../style_01/c/style_01_moral_scenarios.yaml | 4 ---- .../style_01/c/style_01_nutrition.yaml | 4 ---- .../style_01/c/style_01_philosophy.yaml | 4 ---- .../style_01/c/style_01_prehistory.yaml | 4 ---- .../c/style_01_professional_accounting.yaml | 4 ---- .../style_01/c/style_01_professional_law.yaml | 4 ---- .../c/style_01_professional_medicine.yaml | 4 ---- .../c/style_01_professional_psychology.yaml | 4 ---- .../style_01/c/style_01_public_relations.yaml | 4 ---- .../style_01/c/style_01_security_studies.yaml | 4 ---- .../style_01/c/style_01_sociology.yaml | 4 ---- .../style_01/c/style_01_us_foreign_policy.yaml | 4 ---- .../style_01/c/style_01_virology.yaml | 4 ---- .../style_01/c/style_01_world_religions.yaml | 4 ---- 174 files changed, 729 deletions(-) delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml deleted file mode 100644 index 9f72c186..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/_template_yaml +++ /dev/null @@ -1,15 +0,0 @@ -group: mmlu_style_01 -group_alias: style_01 -task: mmlu_style_01a -task_alias: a -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: !function ../../styles.template_01 -doc_to_choice: !function ../../styles.choice_01a -doc_to_target: answer -metric_list: - - metric: acc - - metric: acc_norm - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml deleted file mode 100644 index adbde88f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml deleted file mode 100644 index d0ac8cbf..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml deleted file mode 100644 index c4d6fc38..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml deleted file mode 100644 index 41139a48..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml deleted file mode 100644 index 0741143c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml deleted file mode 100644 index 7e95f6a8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml deleted file mode 100644 index 18bf8054..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml deleted file mode 100644 index 93250def..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml deleted file mode 100644 index a1c2c7be..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml deleted file mode 100644 index ff64eb39..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml deleted file mode 100644 index f4dcdf2f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml deleted file mode 100644 index d84981a6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml deleted file mode 100644 index bb859070..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml deleted file mode 100644 index abfef42f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml deleted file mode 100644 index 1e20ea86..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml deleted file mode 100644 index 4e139219..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml deleted file mode 100644 index 5e8aab5d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml deleted file mode 100644 index 7d0aaee0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml deleted file mode 100644 index 2cf24f96..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml deleted file mode 100644 index 32700f8e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml deleted file mode 100644 index b2463f14..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml deleted file mode 100644 index 62fe240d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml deleted file mode 100644 index ea07f428..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml deleted file mode 100644 index 7dc6ba9f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml deleted file mode 100644 index b8c2f90b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml deleted file mode 100644 index 02d4aa28..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml deleted file mode 100644 index 3c64337f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml deleted file mode 100644 index 8dd5fc59..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml deleted file mode 100644 index 650cd3de..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml deleted file mode 100644 index 2bd87285..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml deleted file mode 100644 index 287e5479..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml deleted file mode 100644 index a2d8b18a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml deleted file mode 100644 index 6226957b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml deleted file mode 100644 index e8acd6f1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml deleted file mode 100644 index e1336f1e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml deleted file mode 100644 index a2f5f14c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml deleted file mode 100644 index 51d83783..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml deleted file mode 100644 index 5912210e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml deleted file mode 100644 index 6e8ed63f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml deleted file mode 100644 index 0e437a32..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml deleted file mode 100644 index 00734846..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml deleted file mode 100644 index 3dcf6d92..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml deleted file mode 100644 index a9fe3cfd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml deleted file mode 100644 index 7be4e0e2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml deleted file mode 100644 index 462b97fa..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml deleted file mode 100644 index 60de6896..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml deleted file mode 100644 index 4cd9185e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml deleted file mode 100644 index d44ae986..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml deleted file mode 100644 index 0c6252b6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml deleted file mode 100644 index dbe8ce5d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml deleted file mode 100644 index b1508047..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml deleted file mode 100644 index 6b5bd12c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml deleted file mode 100644 index 8214bf4f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml deleted file mode 100644 index 5eecad45..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml deleted file mode 100644 index fbbdcdb0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml deleted file mode 100644 index 97d76e17..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml deleted file mode 100644 index 394792f4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/a/style_01_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml deleted file mode 100644 index f3cab9d6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/_template_yaml +++ /dev/null @@ -1,15 +0,0 @@ -group: mmlu_style_01 -group_alias: style_01 -task: mmlu_style_01b -task_alias: b -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: !function ../../styles.template_01 -doc_to_choice: !function ../../styles.choice_01b -doc_to_target: answer -metric_list: - - metric: acc - - metric: acc_norm - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml deleted file mode 100644 index fb1593c2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml deleted file mode 100644 index 095e2962..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml deleted file mode 100644 index 6c8300d9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml deleted file mode 100644 index d18a16ee..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml deleted file mode 100644 index 7d7ff4eb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml deleted file mode 100644 index 63e56071..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml deleted file mode 100644 index 165f1109..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml deleted file mode 100644 index f9be6bc0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml deleted file mode 100644 index f424c5b4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml deleted file mode 100644 index 99dd5539..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml deleted file mode 100644 index f86c0510..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml deleted file mode 100644 index 4bb06a83..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml deleted file mode 100644 index 8d28fd03..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml deleted file mode 100644 index a5e5039f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml deleted file mode 100644 index 99b66fd1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml deleted file mode 100644 index abbc36ba..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml deleted file mode 100644 index 1c062948..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml deleted file mode 100644 index 9ae058c8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml deleted file mode 100644 index 5c3557c0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml deleted file mode 100644 index cab37323..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml deleted file mode 100644 index 4cabb368..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml deleted file mode 100644 index c84b7abd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml deleted file mode 100644 index 740c8de5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml deleted file mode 100644 index 9ec4827a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml deleted file mode 100644 index eb4c62a6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml deleted file mode 100644 index eff1253f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml deleted file mode 100644 index a27ba3e7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml deleted file mode 100644 index 07ddfae7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml deleted file mode 100644 index 0b602c8f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml deleted file mode 100644 index 352b036a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml deleted file mode 100644 index 55f7521c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml deleted file mode 100644 index 5f525bd1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml deleted file mode 100644 index f6d0d789..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml deleted file mode 100644 index fba8f734..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml deleted file mode 100644 index c3bb9083..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml deleted file mode 100644 index 2bd61c14..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml deleted file mode 100644 index 4aa1173e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml deleted file mode 100644 index 7e7e244a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml deleted file mode 100644 index 76c5df3d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml deleted file mode 100644 index 0d5e4a76..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml deleted file mode 100644 index 6de75a5b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml deleted file mode 100644 index c0f829fc..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml deleted file mode 100644 index 41d0bff3..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml deleted file mode 100644 index d87aa1c2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml deleted file mode 100644 index 3a8e8af7..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml deleted file mode 100644 index 0d2a03e8..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml deleted file mode 100644 index 58e73666..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml deleted file mode 100644 index 5e26a638..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml deleted file mode 100644 index 672af7d2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml deleted file mode 100644 index f9f5622c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml deleted file mode 100644 index 7a866885..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml deleted file mode 100644 index ba58dd4f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml deleted file mode 100644 index 9dd237f1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml deleted file mode 100644 index 105075a5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml deleted file mode 100644 index fff71529..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml deleted file mode 100644 index 778b5d3d..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml deleted file mode 100644 index 81c7f5f4..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/b/style_01_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml deleted file mode 100644 index ab765e8b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/_template_yaml +++ /dev/null @@ -1,15 +0,0 @@ -group: mmlu_style_01 -group_alias: style_01 -task: mmlu_style_01c -task_alias: c -dataset_path: cais/mmlu -test_split: test -fewshot_split: dev -output_type: multiple_choice -doc_to_text: !function ../../styles.template_01 -doc_to_choice: !function ../../styles.choice_01c -doc_to_target: answer -metric_list: - - metric: acc - - metric: acc_norm - - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml deleted file mode 100644 index 1d344a0e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_abstract_algebra.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "abstract_algebra" -"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml deleted file mode 100644 index b4bb37b1..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_anatomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are multiple choice questions (with answers) about anatomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml deleted file mode 100644 index 0b63fd38..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_astronomy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "astronomy" -"description": "The following are multiple choice questions (with answers) about astronomy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml deleted file mode 100644 index 0ef5de0f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_business_ethics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "business_ethics" -"description": "The following are multiple choice questions (with answers) about business ethics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml deleted file mode 100644 index 4d8bcb85..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_clinical_knowledge.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "clinical_knowledge" -"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml deleted file mode 100644 index 68ca5d9b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_biology" -"description": "The following are multiple choice questions (with answers) about college biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml deleted file mode 100644 index a15634ae..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_chemistry" -"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml deleted file mode 100644 index 5a176107..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_computer_science" -"description": "The following are multiple choice questions (with answers) about college computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml deleted file mode 100644 index 1400ca3a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_mathematics" -"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml deleted file mode 100644 index 63931106..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_medicine" -"description": "The following are multiple choice questions (with answers) about college medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml deleted file mode 100644 index 2a8f60ff..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_college_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "college_physics" -"description": "The following are multiple choice questions (with answers) about college physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml deleted file mode 100644 index 4e1b6733..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_computer_security.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "computer_security" -"description": "The following are multiple choice questions (with answers) about computer security.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml deleted file mode 100644 index b6bb8329..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_conceptual_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "conceptual_physics" -"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml deleted file mode 100644 index 93d0af1c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_econometrics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are multiple choice questions (with answers) about econometrics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml deleted file mode 100644 index 02ada01a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_electrical_engineering.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "electrical_engineering" -"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml deleted file mode 100644 index 8b6c4377..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_elementary_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "elementary_mathematics" -"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml deleted file mode 100644 index 33195cfa..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_formal_logic.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are multiple choice questions (with answers) about formal logic.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml deleted file mode 100644 index d2e391b6..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_global_facts.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "global_facts" -"description": "The following are multiple choice questions (with answers) about global facts.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml deleted file mode 100644 index f776e3af..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_biology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_biology" -"description": "The following are multiple choice questions (with answers) about high school biology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml deleted file mode 100644 index a0abfc1b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_chemistry.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_chemistry" -"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml deleted file mode 100644 index b6730455..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_computer_science.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_computer_science" -"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml deleted file mode 100644 index 8a0c0450..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_european_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_european_history" -"description": "The following are multiple choice questions (with answers) about high school european history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml deleted file mode 100644 index 551cbd0b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_geography.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_geography" -"description": "The following are multiple choice questions (with answers) about high school geography.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml deleted file mode 100644 index 007b67a9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_government_and_politics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_government_and_politics" -"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml deleted file mode 100644 index 339f090a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_macroeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_macroeconomics" -"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml deleted file mode 100644 index 298c23df..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_mathematics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_mathematics" -"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml deleted file mode 100644 index de821453..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_microeconomics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_microeconomics" -"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml deleted file mode 100644 index 8c38cf29..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_physics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_physics" -"description": "The following are multiple choice questions (with answers) about high school physics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml deleted file mode 100644 index a2c3f8c5..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_psychology" -"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml deleted file mode 100644 index 1e2d0ce9..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_statistics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_statistics" -"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml deleted file mode 100644 index 5b6a3639..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_us_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_us_history" -"description": "The following are multiple choice questions (with answers) about high school us history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml deleted file mode 100644 index 807e01d2..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_high_school_world_history.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "high_school_world_history" -"description": "The following are multiple choice questions (with answers) about high school world history.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml deleted file mode 100644 index 4a05e4ec..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_aging.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_aging" -"description": "The following are multiple choice questions (with answers) about human aging.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml deleted file mode 100644 index 7d4f87dd..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_human_sexuality.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml deleted file mode 100644 index 4c87ca95..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_international_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "international_law" -"description": "The following are multiple choice questions (with answers) about international law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml deleted file mode 100644 index f893df3b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_jurisprudence.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml deleted file mode 100644 index d845ed1c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_logical_fallacies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "logical_fallacies" -"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml deleted file mode 100644 index de7bae42..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_machine_learning.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "machine_learning" -"description": "The following are multiple choice questions (with answers) about machine learning.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml deleted file mode 100644 index 693b6efe..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_management.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "management" -"description": "The following are multiple choice questions (with answers) about management.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml deleted file mode 100644 index 6d999b72..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_marketing.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are multiple choice questions (with answers) about marketing.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml deleted file mode 100644 index 0693416e..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_medical_genetics.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "medical_genetics" -"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml deleted file mode 100644 index 49b669ac..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_miscellaneous.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "miscellaneous" -"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml deleted file mode 100644 index bfc9e4e0..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_disputes.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_disputes" -"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml deleted file mode 100644 index f8dd7efb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_moral_scenarios.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "moral_scenarios" -"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml deleted file mode 100644 index 703db244..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_nutrition.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are multiple choice questions (with answers) about nutrition.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml deleted file mode 100644 index cbf41b70..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_philosophy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are multiple choice questions (with answers) about philosophy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml deleted file mode 100644 index c109226c..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_prehistory.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are multiple choice questions (with answers) about prehistory.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml deleted file mode 100644 index 9ef8738b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_accounting.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_accounting" -"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml deleted file mode 100644 index f748683f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_law.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_law" -"description": "The following are multiple choice questions (with answers) about professional law.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml deleted file mode 100644 index 077c4575..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_medicine.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_medicine" -"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml deleted file mode 100644 index 8bcbac84..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_professional_psychology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "professional_psychology" -"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml deleted file mode 100644 index 5509e50a..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_public_relations.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are multiple choice questions (with answers) about public relations.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml deleted file mode 100644 index f8a0cf58..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_security_studies.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are multiple choice questions (with answers) about security studies.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml deleted file mode 100644 index 9ede980b..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_sociology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are multiple choice questions (with answers) about sociology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml deleted file mode 100644 index 51e28c67..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_us_foreign_policy.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml deleted file mode 100644 index 7c0d1acb..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_virology.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "virology" -"description": "The following are multiple choice questions (with answers) about virology.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml deleted file mode 100644 index 706aa92f..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/style_01/c/style_01_world_religions.yaml +++ /dev/null @@ -1,4 +0,0 @@ -"dataset_name": "world_religions" -"description": "The following are multiple choice questions (with answers) about world religions.\n\n" -"include": "_template_yaml" -"task": "mmlu_style_01c_world_religions" -- GitLab From 6e220b1735d316bcbfa9211cf6bf59fabd6ed5ce Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:17:00 +0000 Subject: [PATCH 29/50] removed --- .../alternative_worlds/mmlu_alternative_worlds_fc.yaml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml deleted file mode 100644 index faca4f74..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_fc.yaml +++ /dev/null @@ -1,7 +0,0 @@ -group: alternative_worlds_mmlu_fc -task: - - mmlu_style_01_fc - - mmlu_style_02_fc - - mmlu_style_03_fc - - mmlu_style_04_fc - - mmlu_style_05_fc -- GitLab From edb21d21d85c519254cbe01478ff57feb1cd5aea Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:17:11 +0000 Subject: [PATCH 30/50] removed --- .../alternative_worlds/mmlu_alternative_worlds_lo.yaml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml deleted file mode 100644 index 2e357184..00000000 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_alternative_worlds_lo.yaml +++ /dev/null @@ -1,7 +0,0 @@ -group: alternative_worlds_mmlu_lo -task: - - mmlu_style_01_lo - - mmlu_style_02_lo - - mmlu_style_03_lo - - mmlu_style_04_lo - - mmlu_style_05_lo -- GitLab From b9e553122a32498c3cfc673c2166195b96c86cd4 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:18:12 +0000 Subject: [PATCH 31/50] add alt prompts for truthfulqa --- .../style_01/truthfulqa_mc1.yaml | 17 +++++++++++++++++ .../style_01/truthfulqa_mc2.yaml | 11 +++++++++++ .../style_02/truthfulqa_mc1.yaml | 17 +++++++++++++++++ .../style_02/truthfulqa_mc2.yaml | 11 +++++++++++ .../style_03/truthfulqa_mc1.yaml | 17 +++++++++++++++++ .../style_03/truthfulqa_mc2.yaml | 11 +++++++++++ 6 files changed, 84 insertions(+) create mode 100644 lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml create mode 100644 lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml create mode 100644 lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml create mode 100644 lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml create mode 100644 lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml create mode 100644 lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml new file mode 100644 index 00000000..2e6f00bb --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc1.yaml @@ -0,0 +1,17 @@ +task: truthfulqa_mc1_alt_pv_01 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_text: "{{question}}" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml new file mode 100644 index 00000000..93b27828 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_01/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2_alt_pv_01 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function ../../../utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml new file mode 100644 index 00000000..5f78ceeb --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc1.yaml @@ -0,0 +1,17 @@ +task: truthfulqa_mc1_alt_pv_02 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_choice: "Q: {{mc2_targets.choices}}\nA:" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml new file mode 100644 index 00000000..fe4535bc --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_02/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2_alt_pv_02 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function ../../../utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml new file mode 100644 index 00000000..e6417d98 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc1.yaml @@ -0,0 +1,17 @@ +task: truthfulqa_mc1_alt_pv_03 +dataset_path: truthful_qa +dataset_name: multiple_choice +output_type: multiple_choice +training_split: null +validation_split: validation +test_split: null +num_fewshot: 0 +doc_to_choice: "Question: {{mc2_targets.choices}}\nAnswer:" +doc_to_target: 0 +doc_to_choice: "{{mc1_targets.choices}}" +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true diff --git a/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml new file mode 100644 index 00000000..1862aa83 --- /dev/null +++ b/lm_eval/tasks/truthfulqa/alternative_worlds/prompt_variation/style_03/truthfulqa_mc2.yaml @@ -0,0 +1,11 @@ +include: truthfulqa_mc1.yaml +task: truthfulqa_mc2_alt_pv_03 +doc_to_target: 0 +doc_to_choice: "{{mc2_targets.choices}}" +process_results: !function ../../../utils.process_results_mc2 +should_decontaminate: True +doc_to_decontamination_query: question +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true -- GitLab From 0d5748b7ca3e0c5ecf8ea9e0d9bb709e64a79446 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:18:39 +0000 Subject: [PATCH 32/50] added mmlu alt world prompts --- .../mmlu_output_variation.yaml | 26 +++++++++++++++++++ .../mmlu_prompt_variation.yaml | 18 +++++++++++++ .../output_variation/_mmlu_ov_01.yaml | 5 ++++ .../output_variation/_mmlu_ov_02.yaml | 5 ++++ .../output_variation/_mmlu_ov_03.yaml | 5 ++++ .../output_variation/_mmlu_ov_04.yaml | 5 ++++ .../output_variation/style_01/a/_mmlu.yaml | 6 +++++ .../style_01/a/_template_yaml | 11 ++++++++ .../style_01/a/mmlu_abstract_algebra.yaml | 6 +++++ .../style_01/a/mmlu_anatomy.yaml | 6 +++++ .../style_01/a/mmlu_astronomy.yaml | 6 +++++ .../style_01/a/mmlu_business_ethics.yaml | 6 +++++ .../style_01/a/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_01/a/mmlu_college_biology.yaml | 6 +++++ .../style_01/a/mmlu_college_chemistry.yaml | 6 +++++ .../a/mmlu_college_computer_science.yaml | 6 +++++ .../style_01/a/mmlu_college_mathematics.yaml | 6 +++++ .../style_01/a/mmlu_college_medicine.yaml | 6 +++++ .../style_01/a/mmlu_college_physics.yaml | 6 +++++ .../style_01/a/mmlu_computer_security.yaml | 6 +++++ .../style_01/a/mmlu_conceptual_physics.yaml | 6 +++++ .../style_01/a/mmlu_econometrics.yaml | 6 +++++ .../a/mmlu_electrical_engineering.yaml | 6 +++++ .../a/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_01/a/mmlu_formal_logic.yaml | 6 +++++ .../style_01/a/mmlu_global_facts.yaml | 6 +++++ .../style_01/a/mmlu_high_school_biology.yaml | 6 +++++ .../a/mmlu_high_school_chemistry.yaml | 6 +++++ .../a/mmlu_high_school_computer_science.yaml | 6 +++++ .../a/mmlu_high_school_european_history.yaml | 6 +++++ .../a/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../a/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../a/mmlu_high_school_mathematics.yaml | 6 +++++ .../a/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_01/a/mmlu_high_school_physics.yaml | 6 +++++ .../a/mmlu_high_school_psychology.yaml | 6 +++++ .../a/mmlu_high_school_statistics.yaml | 6 +++++ .../a/mmlu_high_school_us_history.yaml | 6 +++++ .../a/mmlu_high_school_world_history.yaml | 6 +++++ .../style_01/a/mmlu_human_aging.yaml | 6 +++++ .../style_01/a/mmlu_human_sexuality.yaml | 6 +++++ .../style_01/a/mmlu_international_law.yaml | 6 +++++ .../style_01/a/mmlu_jurisprudence.yaml | 6 +++++ .../style_01/a/mmlu_logical_fallacies.yaml | 6 +++++ .../style_01/a/mmlu_machine_learning.yaml | 6 +++++ .../style_01/a/mmlu_management.yaml | 6 +++++ .../style_01/a/mmlu_marketing.yaml | 6 +++++ .../style_01/a/mmlu_medical_genetics.yaml | 6 +++++ .../style_01/a/mmlu_miscellaneous.yaml | 6 +++++ .../style_01/a/mmlu_moral_disputes.yaml | 6 +++++ .../style_01/a/mmlu_moral_scenarios.yaml | 6 +++++ .../style_01/a/mmlu_nutrition.yaml | 6 +++++ .../style_01/a/mmlu_philosophy.yaml | 6 +++++ .../style_01/a/mmlu_prehistory.yaml | 6 +++++ .../a/mmlu_professional_accounting.yaml | 6 +++++ .../style_01/a/mmlu_professional_law.yaml | 6 +++++ .../a/mmlu_professional_medicine.yaml | 6 +++++ .../a/mmlu_professional_psychology.yaml | 6 +++++ .../style_01/a/mmlu_public_relations.yaml | 6 +++++ .../style_01/a/mmlu_security_studies.yaml | 6 +++++ .../style_01/a/mmlu_sociology.yaml | 6 +++++ .../style_01/a/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_01/a/mmlu_virology.yaml | 6 +++++ .../style_01/a/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_01/b/_mmlu.yaml | 6 +++++ .../style_01/b/_template_yaml | 11 ++++++++ .../style_01/b/mmlu_abstract_algebra.yaml | 6 +++++ .../style_01/b/mmlu_anatomy.yaml | 6 +++++ .../style_01/b/mmlu_astronomy.yaml | 6 +++++ .../style_01/b/mmlu_business_ethics.yaml | 6 +++++ .../style_01/b/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_01/b/mmlu_college_biology.yaml | 6 +++++ .../style_01/b/mmlu_college_chemistry.yaml | 6 +++++ .../b/mmlu_college_computer_science.yaml | 6 +++++ .../style_01/b/mmlu_college_mathematics.yaml | 6 +++++ .../style_01/b/mmlu_college_medicine.yaml | 6 +++++ .../style_01/b/mmlu_college_physics.yaml | 6 +++++ .../style_01/b/mmlu_computer_security.yaml | 6 +++++ .../style_01/b/mmlu_conceptual_physics.yaml | 6 +++++ .../style_01/b/mmlu_econometrics.yaml | 6 +++++ .../b/mmlu_electrical_engineering.yaml | 6 +++++ .../b/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_01/b/mmlu_formal_logic.yaml | 6 +++++ .../style_01/b/mmlu_global_facts.yaml | 6 +++++ .../style_01/b/mmlu_high_school_biology.yaml | 6 +++++ .../b/mmlu_high_school_chemistry.yaml | 6 +++++ .../b/mmlu_high_school_computer_science.yaml | 6 +++++ .../b/mmlu_high_school_european_history.yaml | 6 +++++ .../b/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../b/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../b/mmlu_high_school_mathematics.yaml | 6 +++++ .../b/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_01/b/mmlu_high_school_physics.yaml | 6 +++++ .../b/mmlu_high_school_psychology.yaml | 6 +++++ .../b/mmlu_high_school_statistics.yaml | 6 +++++ .../b/mmlu_high_school_us_history.yaml | 6 +++++ .../b/mmlu_high_school_world_history.yaml | 6 +++++ .../style_01/b/mmlu_human_aging.yaml | 6 +++++ .../style_01/b/mmlu_human_sexuality.yaml | 6 +++++ .../style_01/b/mmlu_international_law.yaml | 6 +++++ .../style_01/b/mmlu_jurisprudence.yaml | 6 +++++ .../style_01/b/mmlu_logical_fallacies.yaml | 6 +++++ .../style_01/b/mmlu_machine_learning.yaml | 6 +++++ .../style_01/b/mmlu_management.yaml | 6 +++++ .../style_01/b/mmlu_marketing.yaml | 6 +++++ .../style_01/b/mmlu_medical_genetics.yaml | 6 +++++ .../style_01/b/mmlu_miscellaneous.yaml | 6 +++++ .../style_01/b/mmlu_moral_disputes.yaml | 6 +++++ .../style_01/b/mmlu_moral_scenarios.yaml | 6 +++++ .../style_01/b/mmlu_nutrition.yaml | 6 +++++ .../style_01/b/mmlu_philosophy.yaml | 6 +++++ .../style_01/b/mmlu_prehistory.yaml | 6 +++++ .../b/mmlu_professional_accounting.yaml | 6 +++++ .../style_01/b/mmlu_professional_law.yaml | 6 +++++ .../b/mmlu_professional_medicine.yaml | 6 +++++ .../b/mmlu_professional_psychology.yaml | 6 +++++ .../style_01/b/mmlu_public_relations.yaml | 6 +++++ .../style_01/b/mmlu_security_studies.yaml | 6 +++++ .../style_01/b/mmlu_sociology.yaml | 6 +++++ .../style_01/b/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_01/b/mmlu_virology.yaml | 6 +++++ .../style_01/b/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_01/c/_mmlu.yaml | 6 +++++ .../style_01/c/_template_yaml | 11 ++++++++ .../style_01/c/mmlu_abstract_algebra.yaml | 6 +++++ .../style_01/c/mmlu_anatomy.yaml | 6 +++++ .../style_01/c/mmlu_astronomy.yaml | 6 +++++ .../style_01/c/mmlu_business_ethics.yaml | 6 +++++ .../style_01/c/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_01/c/mmlu_college_biology.yaml | 6 +++++ .../style_01/c/mmlu_college_chemistry.yaml | 6 +++++ .../c/mmlu_college_computer_science.yaml | 6 +++++ .../style_01/c/mmlu_college_mathematics.yaml | 6 +++++ .../style_01/c/mmlu_college_medicine.yaml | 6 +++++ .../style_01/c/mmlu_college_physics.yaml | 6 +++++ .../style_01/c/mmlu_computer_security.yaml | 6 +++++ .../style_01/c/mmlu_conceptual_physics.yaml | 6 +++++ .../style_01/c/mmlu_econometrics.yaml | 6 +++++ .../c/mmlu_electrical_engineering.yaml | 6 +++++ .../c/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_01/c/mmlu_formal_logic.yaml | 6 +++++ .../style_01/c/mmlu_global_facts.yaml | 6 +++++ .../style_01/c/mmlu_high_school_biology.yaml | 6 +++++ .../c/mmlu_high_school_chemistry.yaml | 6 +++++ .../c/mmlu_high_school_computer_science.yaml | 6 +++++ .../c/mmlu_high_school_european_history.yaml | 6 +++++ .../c/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../c/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../c/mmlu_high_school_mathematics.yaml | 6 +++++ .../c/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_01/c/mmlu_high_school_physics.yaml | 6 +++++ .../c/mmlu_high_school_psychology.yaml | 6 +++++ .../c/mmlu_high_school_statistics.yaml | 6 +++++ .../c/mmlu_high_school_us_history.yaml | 6 +++++ .../c/mmlu_high_school_world_history.yaml | 6 +++++ .../style_01/c/mmlu_human_aging.yaml | 6 +++++ .../style_01/c/mmlu_human_sexuality.yaml | 6 +++++ .../style_01/c/mmlu_international_law.yaml | 6 +++++ .../style_01/c/mmlu_jurisprudence.yaml | 6 +++++ .../style_01/c/mmlu_logical_fallacies.yaml | 6 +++++ .../style_01/c/mmlu_machine_learning.yaml | 6 +++++ .../style_01/c/mmlu_management.yaml | 6 +++++ .../style_01/c/mmlu_marketing.yaml | 6 +++++ .../style_01/c/mmlu_medical_genetics.yaml | 6 +++++ .../style_01/c/mmlu_miscellaneous.yaml | 6 +++++ .../style_01/c/mmlu_moral_disputes.yaml | 6 +++++ .../style_01/c/mmlu_moral_scenarios.yaml | 6 +++++ .../style_01/c/mmlu_nutrition.yaml | 6 +++++ .../style_01/c/mmlu_philosophy.yaml | 6 +++++ .../style_01/c/mmlu_prehistory.yaml | 6 +++++ .../c/mmlu_professional_accounting.yaml | 6 +++++ .../style_01/c/mmlu_professional_law.yaml | 6 +++++ .../c/mmlu_professional_medicine.yaml | 6 +++++ .../c/mmlu_professional_psychology.yaml | 6 +++++ .../style_01/c/mmlu_public_relations.yaml | 6 +++++ .../style_01/c/mmlu_security_studies.yaml | 6 +++++ .../style_01/c/mmlu_sociology.yaml | 6 +++++ .../style_01/c/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_01/c/mmlu_virology.yaml | 6 +++++ .../style_01/c/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_02/a/_mmlu.yaml | 6 +++++ .../style_02/a/_template_yaml | 11 ++++++++ .../style_02/a/mmlu_abstract_algebra.yaml | 6 +++++ .../style_02/a/mmlu_anatomy.yaml | 6 +++++ .../style_02/a/mmlu_astronomy.yaml | 6 +++++ .../style_02/a/mmlu_business_ethics.yaml | 6 +++++ .../style_02/a/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_02/a/mmlu_college_biology.yaml | 6 +++++ .../style_02/a/mmlu_college_chemistry.yaml | 6 +++++ .../a/mmlu_college_computer_science.yaml | 6 +++++ .../style_02/a/mmlu_college_mathematics.yaml | 6 +++++ .../style_02/a/mmlu_college_medicine.yaml | 6 +++++ .../style_02/a/mmlu_college_physics.yaml | 6 +++++ .../style_02/a/mmlu_computer_security.yaml | 6 +++++ .../style_02/a/mmlu_conceptual_physics.yaml | 6 +++++ .../style_02/a/mmlu_econometrics.yaml | 6 +++++ .../a/mmlu_electrical_engineering.yaml | 6 +++++ .../a/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_02/a/mmlu_formal_logic.yaml | 6 +++++ .../style_02/a/mmlu_global_facts.yaml | 6 +++++ .../style_02/a/mmlu_high_school_biology.yaml | 6 +++++ .../a/mmlu_high_school_chemistry.yaml | 6 +++++ .../a/mmlu_high_school_computer_science.yaml | 6 +++++ .../a/mmlu_high_school_european_history.yaml | 6 +++++ .../a/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../a/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../a/mmlu_high_school_mathematics.yaml | 6 +++++ .../a/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_02/a/mmlu_high_school_physics.yaml | 6 +++++ .../a/mmlu_high_school_psychology.yaml | 6 +++++ .../a/mmlu_high_school_statistics.yaml | 6 +++++ .../a/mmlu_high_school_us_history.yaml | 6 +++++ .../a/mmlu_high_school_world_history.yaml | 6 +++++ .../style_02/a/mmlu_human_aging.yaml | 6 +++++ .../style_02/a/mmlu_human_sexuality.yaml | 6 +++++ .../style_02/a/mmlu_international_law.yaml | 6 +++++ .../style_02/a/mmlu_jurisprudence.yaml | 6 +++++ .../style_02/a/mmlu_logical_fallacies.yaml | 6 +++++ .../style_02/a/mmlu_machine_learning.yaml | 6 +++++ .../style_02/a/mmlu_management.yaml | 6 +++++ .../style_02/a/mmlu_marketing.yaml | 6 +++++ .../style_02/a/mmlu_medical_genetics.yaml | 6 +++++ .../style_02/a/mmlu_miscellaneous.yaml | 6 +++++ .../style_02/a/mmlu_moral_disputes.yaml | 6 +++++ .../style_02/a/mmlu_moral_scenarios.yaml | 6 +++++ .../style_02/a/mmlu_nutrition.yaml | 6 +++++ .../style_02/a/mmlu_philosophy.yaml | 6 +++++ .../style_02/a/mmlu_prehistory.yaml | 6 +++++ .../a/mmlu_professional_accounting.yaml | 6 +++++ .../style_02/a/mmlu_professional_law.yaml | 6 +++++ .../a/mmlu_professional_medicine.yaml | 6 +++++ .../a/mmlu_professional_psychology.yaml | 6 +++++ .../style_02/a/mmlu_public_relations.yaml | 6 +++++ .../style_02/a/mmlu_security_studies.yaml | 6 +++++ .../style_02/a/mmlu_sociology.yaml | 6 +++++ .../style_02/a/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_02/a/mmlu_virology.yaml | 6 +++++ .../style_02/a/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_02/b/_mmlu.yaml | 6 +++++ .../style_02/b/_template_yaml | 11 ++++++++ .../style_02/b/mmlu_abstract_algebra.yaml | 6 +++++ .../style_02/b/mmlu_anatomy.yaml | 6 +++++ .../style_02/b/mmlu_astronomy.yaml | 6 +++++ .../style_02/b/mmlu_business_ethics.yaml | 6 +++++ .../style_02/b/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_02/b/mmlu_college_biology.yaml | 6 +++++ .../style_02/b/mmlu_college_chemistry.yaml | 6 +++++ .../b/mmlu_college_computer_science.yaml | 6 +++++ .../style_02/b/mmlu_college_mathematics.yaml | 6 +++++ .../style_02/b/mmlu_college_medicine.yaml | 6 +++++ .../style_02/b/mmlu_college_physics.yaml | 6 +++++ .../style_02/b/mmlu_computer_security.yaml | 6 +++++ .../style_02/b/mmlu_conceptual_physics.yaml | 6 +++++ .../style_02/b/mmlu_econometrics.yaml | 6 +++++ .../b/mmlu_electrical_engineering.yaml | 6 +++++ .../b/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_02/b/mmlu_formal_logic.yaml | 6 +++++ .../style_02/b/mmlu_global_facts.yaml | 6 +++++ .../style_02/b/mmlu_high_school_biology.yaml | 6 +++++ .../b/mmlu_high_school_chemistry.yaml | 6 +++++ .../b/mmlu_high_school_computer_science.yaml | 6 +++++ .../b/mmlu_high_school_european_history.yaml | 6 +++++ .../b/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../b/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../b/mmlu_high_school_mathematics.yaml | 6 +++++ .../b/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_02/b/mmlu_high_school_physics.yaml | 6 +++++ .../b/mmlu_high_school_psychology.yaml | 6 +++++ .../b/mmlu_high_school_statistics.yaml | 6 +++++ .../b/mmlu_high_school_us_history.yaml | 6 +++++ .../b/mmlu_high_school_world_history.yaml | 6 +++++ .../style_02/b/mmlu_human_aging.yaml | 6 +++++ .../style_02/b/mmlu_human_sexuality.yaml | 6 +++++ .../style_02/b/mmlu_international_law.yaml | 6 +++++ .../style_02/b/mmlu_jurisprudence.yaml | 6 +++++ .../style_02/b/mmlu_logical_fallacies.yaml | 6 +++++ .../style_02/b/mmlu_machine_learning.yaml | 6 +++++ .../style_02/b/mmlu_management.yaml | 6 +++++ .../style_02/b/mmlu_marketing.yaml | 6 +++++ .../style_02/b/mmlu_medical_genetics.yaml | 6 +++++ .../style_02/b/mmlu_miscellaneous.yaml | 6 +++++ .../style_02/b/mmlu_moral_disputes.yaml | 6 +++++ .../style_02/b/mmlu_moral_scenarios.yaml | 6 +++++ .../style_02/b/mmlu_nutrition.yaml | 6 +++++ .../style_02/b/mmlu_philosophy.yaml | 6 +++++ .../style_02/b/mmlu_prehistory.yaml | 6 +++++ .../b/mmlu_professional_accounting.yaml | 6 +++++ .../style_02/b/mmlu_professional_law.yaml | 6 +++++ .../b/mmlu_professional_medicine.yaml | 6 +++++ .../b/mmlu_professional_psychology.yaml | 6 +++++ .../style_02/b/mmlu_public_relations.yaml | 6 +++++ .../style_02/b/mmlu_security_studies.yaml | 6 +++++ .../style_02/b/mmlu_sociology.yaml | 6 +++++ .../style_02/b/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_02/b/mmlu_virology.yaml | 6 +++++ .../style_02/b/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_02/c/_mmlu.yaml | 6 +++++ .../style_02/c/_template_yaml | 11 ++++++++ .../style_02/c/mmlu_abstract_algebra.yaml | 6 +++++ .../style_02/c/mmlu_anatomy.yaml | 6 +++++ .../style_02/c/mmlu_astronomy.yaml | 6 +++++ .../style_02/c/mmlu_business_ethics.yaml | 6 +++++ .../style_02/c/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_02/c/mmlu_college_biology.yaml | 6 +++++ .../style_02/c/mmlu_college_chemistry.yaml | 6 +++++ .../c/mmlu_college_computer_science.yaml | 6 +++++ .../style_02/c/mmlu_college_mathematics.yaml | 6 +++++ .../style_02/c/mmlu_college_medicine.yaml | 6 +++++ .../style_02/c/mmlu_college_physics.yaml | 6 +++++ .../style_02/c/mmlu_computer_security.yaml | 6 +++++ .../style_02/c/mmlu_conceptual_physics.yaml | 6 +++++ .../style_02/c/mmlu_econometrics.yaml | 6 +++++ .../c/mmlu_electrical_engineering.yaml | 6 +++++ .../c/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_02/c/mmlu_formal_logic.yaml | 6 +++++ .../style_02/c/mmlu_global_facts.yaml | 6 +++++ .../style_02/c/mmlu_high_school_biology.yaml | 6 +++++ .../c/mmlu_high_school_chemistry.yaml | 6 +++++ .../c/mmlu_high_school_computer_science.yaml | 6 +++++ .../c/mmlu_high_school_european_history.yaml | 6 +++++ .../c/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../c/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../c/mmlu_high_school_mathematics.yaml | 6 +++++ .../c/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_02/c/mmlu_high_school_physics.yaml | 6 +++++ .../c/mmlu_high_school_psychology.yaml | 6 +++++ .../c/mmlu_high_school_statistics.yaml | 6 +++++ .../c/mmlu_high_school_us_history.yaml | 6 +++++ .../c/mmlu_high_school_world_history.yaml | 6 +++++ .../style_02/c/mmlu_human_aging.yaml | 6 +++++ .../style_02/c/mmlu_human_sexuality.yaml | 6 +++++ .../style_02/c/mmlu_international_law.yaml | 6 +++++ .../style_02/c/mmlu_jurisprudence.yaml | 6 +++++ .../style_02/c/mmlu_logical_fallacies.yaml | 6 +++++ .../style_02/c/mmlu_machine_learning.yaml | 6 +++++ .../style_02/c/mmlu_management.yaml | 6 +++++ .../style_02/c/mmlu_marketing.yaml | 6 +++++ .../style_02/c/mmlu_medical_genetics.yaml | 6 +++++ .../style_02/c/mmlu_miscellaneous.yaml | 6 +++++ .../style_02/c/mmlu_moral_disputes.yaml | 6 +++++ .../style_02/c/mmlu_moral_scenarios.yaml | 6 +++++ .../style_02/c/mmlu_nutrition.yaml | 6 +++++ .../style_02/c/mmlu_philosophy.yaml | 6 +++++ .../style_02/c/mmlu_prehistory.yaml | 6 +++++ .../c/mmlu_professional_accounting.yaml | 6 +++++ .../style_02/c/mmlu_professional_law.yaml | 6 +++++ .../c/mmlu_professional_medicine.yaml | 6 +++++ .../c/mmlu_professional_psychology.yaml | 6 +++++ .../style_02/c/mmlu_public_relations.yaml | 6 +++++ .../style_02/c/mmlu_security_studies.yaml | 6 +++++ .../style_02/c/mmlu_sociology.yaml | 6 +++++ .../style_02/c/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_02/c/mmlu_virology.yaml | 6 +++++ .../style_02/c/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_03/a/_mmlu.yaml | 6 +++++ .../style_03/a/_template_yaml | 11 ++++++++ .../style_03/a/mmlu_abstract_algebra.yaml | 6 +++++ .../style_03/a/mmlu_anatomy.yaml | 6 +++++ .../style_03/a/mmlu_astronomy.yaml | 6 +++++ .../style_03/a/mmlu_business_ethics.yaml | 6 +++++ .../style_03/a/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_03/a/mmlu_college_biology.yaml | 6 +++++ .../style_03/a/mmlu_college_chemistry.yaml | 6 +++++ .../a/mmlu_college_computer_science.yaml | 6 +++++ .../style_03/a/mmlu_college_mathematics.yaml | 6 +++++ .../style_03/a/mmlu_college_medicine.yaml | 6 +++++ .../style_03/a/mmlu_college_physics.yaml | 6 +++++ .../style_03/a/mmlu_computer_security.yaml | 6 +++++ .../style_03/a/mmlu_conceptual_physics.yaml | 6 +++++ .../style_03/a/mmlu_econometrics.yaml | 6 +++++ .../a/mmlu_electrical_engineering.yaml | 6 +++++ .../a/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_03/a/mmlu_formal_logic.yaml | 6 +++++ .../style_03/a/mmlu_global_facts.yaml | 6 +++++ .../style_03/a/mmlu_high_school_biology.yaml | 6 +++++ .../a/mmlu_high_school_chemistry.yaml | 6 +++++ .../a/mmlu_high_school_computer_science.yaml | 6 +++++ .../a/mmlu_high_school_european_history.yaml | 6 +++++ .../a/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../a/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../a/mmlu_high_school_mathematics.yaml | 6 +++++ .../a/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_03/a/mmlu_high_school_physics.yaml | 6 +++++ .../a/mmlu_high_school_psychology.yaml | 6 +++++ .../a/mmlu_high_school_statistics.yaml | 6 +++++ .../a/mmlu_high_school_us_history.yaml | 6 +++++ .../a/mmlu_high_school_world_history.yaml | 6 +++++ .../style_03/a/mmlu_human_aging.yaml | 6 +++++ .../style_03/a/mmlu_human_sexuality.yaml | 6 +++++ .../style_03/a/mmlu_international_law.yaml | 6 +++++ .../style_03/a/mmlu_jurisprudence.yaml | 6 +++++ .../style_03/a/mmlu_logical_fallacies.yaml | 6 +++++ .../style_03/a/mmlu_machine_learning.yaml | 6 +++++ .../style_03/a/mmlu_management.yaml | 6 +++++ .../style_03/a/mmlu_marketing.yaml | 6 +++++ .../style_03/a/mmlu_medical_genetics.yaml | 6 +++++ .../style_03/a/mmlu_miscellaneous.yaml | 6 +++++ .../style_03/a/mmlu_moral_disputes.yaml | 6 +++++ .../style_03/a/mmlu_moral_scenarios.yaml | 6 +++++ .../style_03/a/mmlu_nutrition.yaml | 6 +++++ .../style_03/a/mmlu_philosophy.yaml | 6 +++++ .../style_03/a/mmlu_prehistory.yaml | 6 +++++ .../a/mmlu_professional_accounting.yaml | 6 +++++ .../style_03/a/mmlu_professional_law.yaml | 6 +++++ .../a/mmlu_professional_medicine.yaml | 6 +++++ .../a/mmlu_professional_psychology.yaml | 6 +++++ .../style_03/a/mmlu_public_relations.yaml | 6 +++++ .../style_03/a/mmlu_security_studies.yaml | 6 +++++ .../style_03/a/mmlu_sociology.yaml | 6 +++++ .../style_03/a/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_03/a/mmlu_virology.yaml | 6 +++++ .../style_03/a/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_03/b/_mmlu.yaml | 6 +++++ .../style_03/b/_template_yaml | 11 ++++++++ .../style_03/b/mmlu_abstract_algebra.yaml | 6 +++++ .../style_03/b/mmlu_anatomy.yaml | 6 +++++ .../style_03/b/mmlu_astronomy.yaml | 6 +++++ .../style_03/b/mmlu_business_ethics.yaml | 6 +++++ .../style_03/b/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_03/b/mmlu_college_biology.yaml | 6 +++++ .../style_03/b/mmlu_college_chemistry.yaml | 6 +++++ .../b/mmlu_college_computer_science.yaml | 6 +++++ .../style_03/b/mmlu_college_mathematics.yaml | 6 +++++ .../style_03/b/mmlu_college_medicine.yaml | 6 +++++ .../style_03/b/mmlu_college_physics.yaml | 6 +++++ .../style_03/b/mmlu_computer_security.yaml | 6 +++++ .../style_03/b/mmlu_conceptual_physics.yaml | 6 +++++ .../style_03/b/mmlu_econometrics.yaml | 6 +++++ .../b/mmlu_electrical_engineering.yaml | 6 +++++ .../b/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_03/b/mmlu_formal_logic.yaml | 6 +++++ .../style_03/b/mmlu_global_facts.yaml | 6 +++++ .../style_03/b/mmlu_high_school_biology.yaml | 6 +++++ .../b/mmlu_high_school_chemistry.yaml | 6 +++++ .../b/mmlu_high_school_computer_science.yaml | 6 +++++ .../b/mmlu_high_school_european_history.yaml | 6 +++++ .../b/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../b/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../b/mmlu_high_school_mathematics.yaml | 6 +++++ .../b/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_03/b/mmlu_high_school_physics.yaml | 6 +++++ .../b/mmlu_high_school_psychology.yaml | 6 +++++ .../b/mmlu_high_school_statistics.yaml | 6 +++++ .../b/mmlu_high_school_us_history.yaml | 6 +++++ .../b/mmlu_high_school_world_history.yaml | 6 +++++ .../style_03/b/mmlu_human_aging.yaml | 6 +++++ .../style_03/b/mmlu_human_sexuality.yaml | 6 +++++ .../style_03/b/mmlu_international_law.yaml | 6 +++++ .../style_03/b/mmlu_jurisprudence.yaml | 6 +++++ .../style_03/b/mmlu_logical_fallacies.yaml | 6 +++++ .../style_03/b/mmlu_machine_learning.yaml | 6 +++++ .../style_03/b/mmlu_management.yaml | 6 +++++ .../style_03/b/mmlu_marketing.yaml | 6 +++++ .../style_03/b/mmlu_medical_genetics.yaml | 6 +++++ .../style_03/b/mmlu_miscellaneous.yaml | 6 +++++ .../style_03/b/mmlu_moral_disputes.yaml | 6 +++++ .../style_03/b/mmlu_moral_scenarios.yaml | 6 +++++ .../style_03/b/mmlu_nutrition.yaml | 6 +++++ .../style_03/b/mmlu_philosophy.yaml | 6 +++++ .../style_03/b/mmlu_prehistory.yaml | 6 +++++ .../b/mmlu_professional_accounting.yaml | 6 +++++ .../style_03/b/mmlu_professional_law.yaml | 6 +++++ .../b/mmlu_professional_medicine.yaml | 6 +++++ .../b/mmlu_professional_psychology.yaml | 6 +++++ .../style_03/b/mmlu_public_relations.yaml | 6 +++++ .../style_03/b/mmlu_security_studies.yaml | 6 +++++ .../style_03/b/mmlu_sociology.yaml | 6 +++++ .../style_03/b/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_03/b/mmlu_virology.yaml | 6 +++++ .../style_03/b/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_03/c/_mmlu.yaml | 6 +++++ .../style_03/c/_template_yaml | 11 ++++++++ .../style_03/c/mmlu_abstract_algebra.yaml | 6 +++++ .../style_03/c/mmlu_anatomy.yaml | 6 +++++ .../style_03/c/mmlu_astronomy.yaml | 6 +++++ .../style_03/c/mmlu_business_ethics.yaml | 6 +++++ .../style_03/c/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_03/c/mmlu_college_biology.yaml | 6 +++++ .../style_03/c/mmlu_college_chemistry.yaml | 6 +++++ .../c/mmlu_college_computer_science.yaml | 6 +++++ .../style_03/c/mmlu_college_mathematics.yaml | 6 +++++ .../style_03/c/mmlu_college_medicine.yaml | 6 +++++ .../style_03/c/mmlu_college_physics.yaml | 6 +++++ .../style_03/c/mmlu_computer_security.yaml | 6 +++++ .../style_03/c/mmlu_conceptual_physics.yaml | 6 +++++ .../style_03/c/mmlu_econometrics.yaml | 6 +++++ .../c/mmlu_electrical_engineering.yaml | 6 +++++ .../c/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_03/c/mmlu_formal_logic.yaml | 6 +++++ .../style_03/c/mmlu_global_facts.yaml | 6 +++++ .../style_03/c/mmlu_high_school_biology.yaml | 6 +++++ .../c/mmlu_high_school_chemistry.yaml | 6 +++++ .../c/mmlu_high_school_computer_science.yaml | 6 +++++ .../c/mmlu_high_school_european_history.yaml | 6 +++++ .../c/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../c/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../c/mmlu_high_school_mathematics.yaml | 6 +++++ .../c/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_03/c/mmlu_high_school_physics.yaml | 6 +++++ .../c/mmlu_high_school_psychology.yaml | 6 +++++ .../c/mmlu_high_school_statistics.yaml | 6 +++++ .../c/mmlu_high_school_us_history.yaml | 6 +++++ .../c/mmlu_high_school_world_history.yaml | 6 +++++ .../style_03/c/mmlu_human_aging.yaml | 6 +++++ .../style_03/c/mmlu_human_sexuality.yaml | 6 +++++ .../style_03/c/mmlu_international_law.yaml | 6 +++++ .../style_03/c/mmlu_jurisprudence.yaml | 6 +++++ .../style_03/c/mmlu_logical_fallacies.yaml | 6 +++++ .../style_03/c/mmlu_machine_learning.yaml | 6 +++++ .../style_03/c/mmlu_management.yaml | 6 +++++ .../style_03/c/mmlu_marketing.yaml | 6 +++++ .../style_03/c/mmlu_medical_genetics.yaml | 6 +++++ .../style_03/c/mmlu_miscellaneous.yaml | 6 +++++ .../style_03/c/mmlu_moral_disputes.yaml | 6 +++++ .../style_03/c/mmlu_moral_scenarios.yaml | 6 +++++ .../style_03/c/mmlu_nutrition.yaml | 6 +++++ .../style_03/c/mmlu_philosophy.yaml | 6 +++++ .../style_03/c/mmlu_prehistory.yaml | 6 +++++ .../c/mmlu_professional_accounting.yaml | 6 +++++ .../style_03/c/mmlu_professional_law.yaml | 6 +++++ .../c/mmlu_professional_medicine.yaml | 6 +++++ .../c/mmlu_professional_psychology.yaml | 6 +++++ .../style_03/c/mmlu_public_relations.yaml | 6 +++++ .../style_03/c/mmlu_security_studies.yaml | 6 +++++ .../style_03/c/mmlu_sociology.yaml | 6 +++++ .../style_03/c/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_03/c/mmlu_virology.yaml | 6 +++++ .../style_03/c/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_04/a/_mmlu.yaml | 6 +++++ .../style_04/a/_template_yaml | 11 ++++++++ .../style_04/a/mmlu_abstract_algebra.yaml | 6 +++++ .../style_04/a/mmlu_anatomy.yaml | 6 +++++ .../style_04/a/mmlu_astronomy.yaml | 6 +++++ .../style_04/a/mmlu_business_ethics.yaml | 6 +++++ .../style_04/a/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_04/a/mmlu_college_biology.yaml | 6 +++++ .../style_04/a/mmlu_college_chemistry.yaml | 6 +++++ .../a/mmlu_college_computer_science.yaml | 6 +++++ .../style_04/a/mmlu_college_mathematics.yaml | 6 +++++ .../style_04/a/mmlu_college_medicine.yaml | 6 +++++ .../style_04/a/mmlu_college_physics.yaml | 6 +++++ .../style_04/a/mmlu_computer_security.yaml | 6 +++++ .../style_04/a/mmlu_conceptual_physics.yaml | 6 +++++ .../style_04/a/mmlu_econometrics.yaml | 6 +++++ .../a/mmlu_electrical_engineering.yaml | 6 +++++ .../a/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_04/a/mmlu_formal_logic.yaml | 6 +++++ .../style_04/a/mmlu_global_facts.yaml | 6 +++++ .../style_04/a/mmlu_high_school_biology.yaml | 6 +++++ .../a/mmlu_high_school_chemistry.yaml | 6 +++++ .../a/mmlu_high_school_computer_science.yaml | 6 +++++ .../a/mmlu_high_school_european_history.yaml | 6 +++++ .../a/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../a/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../a/mmlu_high_school_mathematics.yaml | 6 +++++ .../a/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_04/a/mmlu_high_school_physics.yaml | 6 +++++ .../a/mmlu_high_school_psychology.yaml | 6 +++++ .../a/mmlu_high_school_statistics.yaml | 6 +++++ .../a/mmlu_high_school_us_history.yaml | 6 +++++ .../a/mmlu_high_school_world_history.yaml | 6 +++++ .../style_04/a/mmlu_human_aging.yaml | 6 +++++ .../style_04/a/mmlu_human_sexuality.yaml | 6 +++++ .../style_04/a/mmlu_international_law.yaml | 6 +++++ .../style_04/a/mmlu_jurisprudence.yaml | 6 +++++ .../style_04/a/mmlu_logical_fallacies.yaml | 6 +++++ .../style_04/a/mmlu_machine_learning.yaml | 6 +++++ .../style_04/a/mmlu_management.yaml | 6 +++++ .../style_04/a/mmlu_marketing.yaml | 6 +++++ .../style_04/a/mmlu_medical_genetics.yaml | 6 +++++ .../style_04/a/mmlu_miscellaneous.yaml | 6 +++++ .../style_04/a/mmlu_moral_disputes.yaml | 6 +++++ .../style_04/a/mmlu_moral_scenarios.yaml | 6 +++++ .../style_04/a/mmlu_nutrition.yaml | 6 +++++ .../style_04/a/mmlu_philosophy.yaml | 6 +++++ .../style_04/a/mmlu_prehistory.yaml | 6 +++++ .../a/mmlu_professional_accounting.yaml | 6 +++++ .../style_04/a/mmlu_professional_law.yaml | 6 +++++ .../a/mmlu_professional_medicine.yaml | 6 +++++ .../a/mmlu_professional_psychology.yaml | 6 +++++ .../style_04/a/mmlu_public_relations.yaml | 6 +++++ .../style_04/a/mmlu_security_studies.yaml | 6 +++++ .../style_04/a/mmlu_sociology.yaml | 6 +++++ .../style_04/a/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_04/a/mmlu_virology.yaml | 6 +++++ .../style_04/a/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_04/b/_mmlu.yaml | 6 +++++ .../style_04/b/_template_yaml | 11 ++++++++ .../style_04/b/mmlu_abstract_algebra.yaml | 6 +++++ .../style_04/b/mmlu_anatomy.yaml | 6 +++++ .../style_04/b/mmlu_astronomy.yaml | 6 +++++ .../style_04/b/mmlu_business_ethics.yaml | 6 +++++ .../style_04/b/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_04/b/mmlu_college_biology.yaml | 6 +++++ .../style_04/b/mmlu_college_chemistry.yaml | 6 +++++ .../b/mmlu_college_computer_science.yaml | 6 +++++ .../style_04/b/mmlu_college_mathematics.yaml | 6 +++++ .../style_04/b/mmlu_college_medicine.yaml | 6 +++++ .../style_04/b/mmlu_college_physics.yaml | 6 +++++ .../style_04/b/mmlu_computer_security.yaml | 6 +++++ .../style_04/b/mmlu_conceptual_physics.yaml | 6 +++++ .../style_04/b/mmlu_econometrics.yaml | 6 +++++ .../b/mmlu_electrical_engineering.yaml | 6 +++++ .../b/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_04/b/mmlu_formal_logic.yaml | 6 +++++ .../style_04/b/mmlu_global_facts.yaml | 6 +++++ .../style_04/b/mmlu_high_school_biology.yaml | 6 +++++ .../b/mmlu_high_school_chemistry.yaml | 6 +++++ .../b/mmlu_high_school_computer_science.yaml | 6 +++++ .../b/mmlu_high_school_european_history.yaml | 6 +++++ .../b/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../b/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../b/mmlu_high_school_mathematics.yaml | 6 +++++ .../b/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_04/b/mmlu_high_school_physics.yaml | 6 +++++ .../b/mmlu_high_school_psychology.yaml | 6 +++++ .../b/mmlu_high_school_statistics.yaml | 6 +++++ .../b/mmlu_high_school_us_history.yaml | 6 +++++ .../b/mmlu_high_school_world_history.yaml | 6 +++++ .../style_04/b/mmlu_human_aging.yaml | 6 +++++ .../style_04/b/mmlu_human_sexuality.yaml | 6 +++++ .../style_04/b/mmlu_international_law.yaml | 6 +++++ .../style_04/b/mmlu_jurisprudence.yaml | 6 +++++ .../style_04/b/mmlu_logical_fallacies.yaml | 6 +++++ .../style_04/b/mmlu_machine_learning.yaml | 6 +++++ .../style_04/b/mmlu_management.yaml | 6 +++++ .../style_04/b/mmlu_marketing.yaml | 6 +++++ .../style_04/b/mmlu_medical_genetics.yaml | 6 +++++ .../style_04/b/mmlu_miscellaneous.yaml | 6 +++++ .../style_04/b/mmlu_moral_disputes.yaml | 6 +++++ .../style_04/b/mmlu_moral_scenarios.yaml | 6 +++++ .../style_04/b/mmlu_nutrition.yaml | 6 +++++ .../style_04/b/mmlu_philosophy.yaml | 6 +++++ .../style_04/b/mmlu_prehistory.yaml | 6 +++++ .../b/mmlu_professional_accounting.yaml | 6 +++++ .../style_04/b/mmlu_professional_law.yaml | 6 +++++ .../b/mmlu_professional_medicine.yaml | 6 +++++ .../b/mmlu_professional_psychology.yaml | 6 +++++ .../style_04/b/mmlu_public_relations.yaml | 6 +++++ .../style_04/b/mmlu_security_studies.yaml | 6 +++++ .../style_04/b/mmlu_sociology.yaml | 6 +++++ .../style_04/b/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_04/b/mmlu_virology.yaml | 6 +++++ .../style_04/b/mmlu_world_religions.yaml | 6 +++++ .../output_variation/style_04/c/_mmlu.yaml | 6 +++++ .../style_04/c/_template_yaml | 11 ++++++++ .../style_04/c/mmlu_abstract_algebra.yaml | 6 +++++ .../style_04/c/mmlu_anatomy.yaml | 6 +++++ .../style_04/c/mmlu_astronomy.yaml | 6 +++++ .../style_04/c/mmlu_business_ethics.yaml | 6 +++++ .../style_04/c/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_04/c/mmlu_college_biology.yaml | 6 +++++ .../style_04/c/mmlu_college_chemistry.yaml | 6 +++++ .../c/mmlu_college_computer_science.yaml | 6 +++++ .../style_04/c/mmlu_college_mathematics.yaml | 6 +++++ .../style_04/c/mmlu_college_medicine.yaml | 6 +++++ .../style_04/c/mmlu_college_physics.yaml | 6 +++++ .../style_04/c/mmlu_computer_security.yaml | 6 +++++ .../style_04/c/mmlu_conceptual_physics.yaml | 6 +++++ .../style_04/c/mmlu_econometrics.yaml | 6 +++++ .../c/mmlu_electrical_engineering.yaml | 6 +++++ .../c/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_04/c/mmlu_formal_logic.yaml | 6 +++++ .../style_04/c/mmlu_global_facts.yaml | 6 +++++ .../style_04/c/mmlu_high_school_biology.yaml | 6 +++++ .../c/mmlu_high_school_chemistry.yaml | 6 +++++ .../c/mmlu_high_school_computer_science.yaml | 6 +++++ .../c/mmlu_high_school_european_history.yaml | 6 +++++ .../c/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../c/mmlu_high_school_macroeconomics.yaml | 6 +++++ .../c/mmlu_high_school_mathematics.yaml | 6 +++++ .../c/mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_04/c/mmlu_high_school_physics.yaml | 6 +++++ .../c/mmlu_high_school_psychology.yaml | 6 +++++ .../c/mmlu_high_school_statistics.yaml | 6 +++++ .../c/mmlu_high_school_us_history.yaml | 6 +++++ .../c/mmlu_high_school_world_history.yaml | 6 +++++ .../style_04/c/mmlu_human_aging.yaml | 6 +++++ .../style_04/c/mmlu_human_sexuality.yaml | 6 +++++ .../style_04/c/mmlu_international_law.yaml | 6 +++++ .../style_04/c/mmlu_jurisprudence.yaml | 6 +++++ .../style_04/c/mmlu_logical_fallacies.yaml | 6 +++++ .../style_04/c/mmlu_machine_learning.yaml | 6 +++++ .../style_04/c/mmlu_management.yaml | 6 +++++ .../style_04/c/mmlu_marketing.yaml | 6 +++++ .../style_04/c/mmlu_medical_genetics.yaml | 6 +++++ .../style_04/c/mmlu_miscellaneous.yaml | 6 +++++ .../style_04/c/mmlu_moral_disputes.yaml | 6 +++++ .../style_04/c/mmlu_moral_scenarios.yaml | 6 +++++ .../style_04/c/mmlu_nutrition.yaml | 6 +++++ .../style_04/c/mmlu_philosophy.yaml | 6 +++++ .../style_04/c/mmlu_prehistory.yaml | 6 +++++ .../c/mmlu_professional_accounting.yaml | 6 +++++ .../style_04/c/mmlu_professional_law.yaml | 6 +++++ .../c/mmlu_professional_medicine.yaml | 6 +++++ .../c/mmlu_professional_psychology.yaml | 6 +++++ .../style_04/c/mmlu_public_relations.yaml | 6 +++++ .../style_04/c/mmlu_security_studies.yaml | 6 +++++ .../style_04/c/mmlu_sociology.yaml | 6 +++++ .../style_04/c/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_04/c/mmlu_virology.yaml | 6 +++++ .../style_04/c/mmlu_world_religions.yaml | 6 +++++ .../style_01/_default_template_yaml | 13 ++++++++++ .../prompt_variation/style_01/_mmlu.yaml | 6 +++++ .../style_01/mmlu_abstract_algebra.yaml | 6 +++++ .../style_01/mmlu_anatomy.yaml | 6 +++++ .../style_01/mmlu_astronomy.yaml | 6 +++++ .../style_01/mmlu_business_ethics.yaml | 6 +++++ .../style_01/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_01/mmlu_college_biology.yaml | 6 +++++ .../style_01/mmlu_college_chemistry.yaml | 6 +++++ .../mmlu_college_computer_science.yaml | 6 +++++ .../style_01/mmlu_college_mathematics.yaml | 6 +++++ .../style_01/mmlu_college_medicine.yaml | 6 +++++ .../style_01/mmlu_college_physics.yaml | 6 +++++ .../style_01/mmlu_computer_security.yaml | 6 +++++ .../style_01/mmlu_conceptual_physics.yaml | 6 +++++ .../style_01/mmlu_econometrics.yaml | 6 +++++ .../style_01/mmlu_electrical_engineering.yaml | 6 +++++ .../style_01/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_01/mmlu_formal_logic.yaml | 6 +++++ .../style_01/mmlu_global_facts.yaml | 6 +++++ .../style_01/mmlu_high_school_biology.yaml | 6 +++++ .../style_01/mmlu_high_school_chemistry.yaml | 6 +++++ .../mmlu_high_school_computer_science.yaml | 6 +++++ .../mmlu_high_school_european_history.yaml | 6 +++++ .../style_01/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../mmlu_high_school_macroeconomics.yaml | 6 +++++ .../mmlu_high_school_mathematics.yaml | 6 +++++ .../mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_01/mmlu_high_school_physics.yaml | 6 +++++ .../style_01/mmlu_high_school_psychology.yaml | 6 +++++ .../style_01/mmlu_high_school_statistics.yaml | 6 +++++ .../style_01/mmlu_high_school_us_history.yaml | 6 +++++ .../mmlu_high_school_world_history.yaml | 6 +++++ .../style_01/mmlu_human_aging.yaml | 6 +++++ .../style_01/mmlu_human_sexuality.yaml | 6 +++++ .../style_01/mmlu_international_law.yaml | 6 +++++ .../style_01/mmlu_jurisprudence.yaml | 6 +++++ .../style_01/mmlu_logical_fallacies.yaml | 6 +++++ .../style_01/mmlu_machine_learning.yaml | 6 +++++ .../style_01/mmlu_management.yaml | 6 +++++ .../style_01/mmlu_marketing.yaml | 6 +++++ .../style_01/mmlu_medical_genetics.yaml | 6 +++++ .../style_01/mmlu_miscellaneous.yaml | 6 +++++ .../style_01/mmlu_moral_disputes.yaml | 6 +++++ .../style_01/mmlu_moral_scenarios.yaml | 6 +++++ .../style_01/mmlu_nutrition.yaml | 6 +++++ .../style_01/mmlu_philosophy.yaml | 6 +++++ .../style_01/mmlu_prehistory.yaml | 6 +++++ .../mmlu_professional_accounting.yaml | 6 +++++ .../style_01/mmlu_professional_law.yaml | 6 +++++ .../style_01/mmlu_professional_medicine.yaml | 6 +++++ .../mmlu_professional_psychology.yaml | 6 +++++ .../style_01/mmlu_public_relations.yaml | 6 +++++ .../style_01/mmlu_security_studies.yaml | 6 +++++ .../style_01/mmlu_sociology.yaml | 6 +++++ .../style_01/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_01/mmlu_virology.yaml | 6 +++++ .../style_01/mmlu_world_religions.yaml | 6 +++++ .../style_02/_default_template_yaml | 13 ++++++++++ .../prompt_variation/style_02/_mmlu.yaml | 6 +++++ .../style_02/mmlu_abstract_algebra.yaml | 6 +++++ .../style_02/mmlu_anatomy.yaml | 6 +++++ .../style_02/mmlu_astronomy.yaml | 6 +++++ .../style_02/mmlu_business_ethics.yaml | 6 +++++ .../style_02/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_02/mmlu_college_biology.yaml | 6 +++++ .../style_02/mmlu_college_chemistry.yaml | 6 +++++ .../mmlu_college_computer_science.yaml | 6 +++++ .../style_02/mmlu_college_mathematics.yaml | 6 +++++ .../style_02/mmlu_college_medicine.yaml | 6 +++++ .../style_02/mmlu_college_physics.yaml | 6 +++++ .../style_02/mmlu_computer_security.yaml | 6 +++++ .../style_02/mmlu_conceptual_physics.yaml | 6 +++++ .../style_02/mmlu_econometrics.yaml | 6 +++++ .../style_02/mmlu_electrical_engineering.yaml | 6 +++++ .../style_02/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_02/mmlu_formal_logic.yaml | 6 +++++ .../style_02/mmlu_global_facts.yaml | 6 +++++ .../style_02/mmlu_high_school_biology.yaml | 6 +++++ .../style_02/mmlu_high_school_chemistry.yaml | 6 +++++ .../mmlu_high_school_computer_science.yaml | 6 +++++ .../mmlu_high_school_european_history.yaml | 6 +++++ .../style_02/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../mmlu_high_school_macroeconomics.yaml | 6 +++++ .../mmlu_high_school_mathematics.yaml | 6 +++++ .../mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_02/mmlu_high_school_physics.yaml | 6 +++++ .../style_02/mmlu_high_school_psychology.yaml | 6 +++++ .../style_02/mmlu_high_school_statistics.yaml | 6 +++++ .../style_02/mmlu_high_school_us_history.yaml | 6 +++++ .../mmlu_high_school_world_history.yaml | 6 +++++ .../style_02/mmlu_human_aging.yaml | 6 +++++ .../style_02/mmlu_human_sexuality.yaml | 6 +++++ .../style_02/mmlu_international_law.yaml | 6 +++++ .../style_02/mmlu_jurisprudence.yaml | 6 +++++ .../style_02/mmlu_logical_fallacies.yaml | 6 +++++ .../style_02/mmlu_machine_learning.yaml | 6 +++++ .../style_02/mmlu_management.yaml | 6 +++++ .../style_02/mmlu_marketing.yaml | 6 +++++ .../style_02/mmlu_medical_genetics.yaml | 6 +++++ .../style_02/mmlu_miscellaneous.yaml | 6 +++++ .../style_02/mmlu_moral_disputes.yaml | 6 +++++ .../style_02/mmlu_moral_scenarios.yaml | 6 +++++ .../style_02/mmlu_nutrition.yaml | 6 +++++ .../style_02/mmlu_philosophy.yaml | 6 +++++ .../style_02/mmlu_prehistory.yaml | 6 +++++ .../mmlu_professional_accounting.yaml | 6 +++++ .../style_02/mmlu_professional_law.yaml | 6 +++++ .../style_02/mmlu_professional_medicine.yaml | 6 +++++ .../mmlu_professional_psychology.yaml | 6 +++++ .../style_02/mmlu_public_relations.yaml | 6 +++++ .../style_02/mmlu_security_studies.yaml | 6 +++++ .../style_02/mmlu_sociology.yaml | 6 +++++ .../style_02/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_02/mmlu_virology.yaml | 6 +++++ .../style_02/mmlu_world_religions.yaml | 6 +++++ .../style_03/_default_template_yaml | 13 ++++++++++ .../prompt_variation/style_03/_mmlu.yaml | 6 +++++ .../style_03/mmlu_abstract_algebra.yaml | 6 +++++ .../style_03/mmlu_anatomy.yaml | 6 +++++ .../style_03/mmlu_astronomy.yaml | 6 +++++ .../style_03/mmlu_business_ethics.yaml | 6 +++++ .../style_03/mmlu_clinical_knowledge.yaml | 6 +++++ .../style_03/mmlu_college_biology.yaml | 6 +++++ .../style_03/mmlu_college_chemistry.yaml | 6 +++++ .../mmlu_college_computer_science.yaml | 6 +++++ .../style_03/mmlu_college_mathematics.yaml | 6 +++++ .../style_03/mmlu_college_medicine.yaml | 6 +++++ .../style_03/mmlu_college_physics.yaml | 6 +++++ .../style_03/mmlu_computer_security.yaml | 6 +++++ .../style_03/mmlu_conceptual_physics.yaml | 6 +++++ .../style_03/mmlu_econometrics.yaml | 6 +++++ .../style_03/mmlu_electrical_engineering.yaml | 6 +++++ .../style_03/mmlu_elementary_mathematics.yaml | 6 +++++ .../style_03/mmlu_formal_logic.yaml | 6 +++++ .../style_03/mmlu_global_facts.yaml | 6 +++++ .../style_03/mmlu_high_school_biology.yaml | 6 +++++ .../style_03/mmlu_high_school_chemistry.yaml | 6 +++++ .../mmlu_high_school_computer_science.yaml | 6 +++++ .../mmlu_high_school_european_history.yaml | 6 +++++ .../style_03/mmlu_high_school_geography.yaml | 6 +++++ ...u_high_school_government_and_politics.yaml | 6 +++++ .../mmlu_high_school_macroeconomics.yaml | 6 +++++ .../mmlu_high_school_mathematics.yaml | 6 +++++ .../mmlu_high_school_microeconomics.yaml | 6 +++++ .../style_03/mmlu_high_school_physics.yaml | 6 +++++ .../style_03/mmlu_high_school_psychology.yaml | 6 +++++ .../style_03/mmlu_high_school_statistics.yaml | 6 +++++ .../style_03/mmlu_high_school_us_history.yaml | 6 +++++ .../mmlu_high_school_world_history.yaml | 6 +++++ .../style_03/mmlu_human_aging.yaml | 6 +++++ .../style_03/mmlu_human_sexuality.yaml | 6 +++++ .../style_03/mmlu_international_law.yaml | 6 +++++ .../style_03/mmlu_jurisprudence.yaml | 6 +++++ .../style_03/mmlu_logical_fallacies.yaml | 6 +++++ .../style_03/mmlu_machine_learning.yaml | 6 +++++ .../style_03/mmlu_management.yaml | 6 +++++ .../style_03/mmlu_marketing.yaml | 6 +++++ .../style_03/mmlu_medical_genetics.yaml | 6 +++++ .../style_03/mmlu_miscellaneous.yaml | 6 +++++ .../style_03/mmlu_moral_disputes.yaml | 6 +++++ .../style_03/mmlu_moral_scenarios.yaml | 6 +++++ .../style_03/mmlu_nutrition.yaml | 6 +++++ .../style_03/mmlu_philosophy.yaml | 6 +++++ .../style_03/mmlu_prehistory.yaml | 6 +++++ .../mmlu_professional_accounting.yaml | 6 +++++ .../style_03/mmlu_professional_law.yaml | 6 +++++ .../style_03/mmlu_professional_medicine.yaml | 6 +++++ .../mmlu_professional_psychology.yaml | 6 +++++ .../style_03/mmlu_public_relations.yaml | 6 +++++ .../style_03/mmlu_security_studies.yaml | 6 +++++ .../style_03/mmlu_sociology.yaml | 6 +++++ .../style_03/mmlu_us_foreign_policy.yaml | 6 +++++ .../style_03/mmlu_virology.yaml | 6 +++++ .../style_03/mmlu_world_religions.yaml | 6 +++++ 891 files changed, 5455 insertions(+) create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_01.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_02.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_03.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_04.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml new file mode 100644 index 00000000..29cd1544 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml @@ -0,0 +1,26 @@ +group: mmlu_alt_ov +task: + - mmlu_alt_ov_01a + - mmlu_alt_ov_01b + - mmlu_alt_ov_01c + - mmlu_alt_ov_02a + - mmlu_alt_ov_02b + - mmlu_alt_ov_02c + - mmlu_alt_ov_03a + - mmlu_alt_ov_03b + - mmlu_alt_ov_03c + - mmlu_alt_ov_04a + - mmlu_alt_ov_04b + - mmlu_alt_ov_04c + - mmlu_alt_ov_05a + - mmlu_alt_ov_05b + - mmlu_alt_ov_05c + - mmlu_alt_ov_06a + - mmlu_alt_ov_06b + - mmlu_alt_ov_06c + - mmlu_alt_ov_07a + - mmlu_alt_ov_07b + - mmlu_alt_ov_07c + - mmlu_alt_ov_08a + - mmlu_alt_ov_08b + - mmlu_alt_ov_08c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml new file mode 100644 index 00000000..65752917 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml @@ -0,0 +1,18 @@ +group: mmlu_alt_pv +task: + - mmlu_alt_pv_01 + - mmlu_alt_pv_02 + - mmlu_alt_pv_03 + # - mmlu_alt_pv_01_stem + # - mmlu_alt_pv_01_other + # - mmlu_alt_pv_01_social_sciences + # - mmlu_alt_pv_01_humanities + # - mmlu_alt_pv_02_stem + # - mmlu_alt_pv_02_other + # - mmlu_alt_pv_02_social_sciences + # - mmlu_alt_pv_02_humanities + # - mmlu_alt_pv_03_stem + # - mmlu_alt_pv_03_other + # - mmlu_alt_pv_03_social_sciences + # - mmlu_alt_pv_03_humanities + diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_01.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_01.yaml new file mode 100644 index 00000000..80dc2866 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_01.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_01 +task: + - mmlu_alt_ov_01a + - mmlu_alt_ov_01b + - mmlu_alt_ov_01c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_02.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_02.yaml new file mode 100644 index 00000000..b7bb3acc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_02.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_02 +task: + - mmlu_alt_ov_02a + - mmlu_alt_ov_02b + - mmlu_alt_ov_02c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_03.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_03.yaml new file mode 100644 index 00000000..92c61b2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_03.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_03 +task: + - mmlu_alt_ov_03a + - mmlu_alt_ov_03b + - mmlu_alt_ov_03c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_04.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_04.yaml new file mode 100644 index 00000000..6dbe0e4e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_04.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_04 +task: + - mmlu_alt_ov_04a + - mmlu_alt_ov_04b + - mmlu_alt_ov_04c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml new file mode 100644 index 00000000..2839307e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_01a +task: + - mmlu_alt_ov_01a_stem + - mmlu_alt_ov_01a_other + - mmlu_alt_ov_01a_social_sciences + - mmlu_alt_ov_01a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml new file mode 100644 index 00000000..ffd6d617 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_01 +doc_to_choice: !function ../../../styles.choice_01a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..8b6d983d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..8179c42e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..d81a6206 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..b435f2d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..397f1a22 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..b56ece83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..200619e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..b3c603c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..0ee8bff0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..4edd910e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..6f57ec94 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..1af0a156 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..6b135195 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..9ebd1493 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..1aca0f3e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..027d0eb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..ec8595a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..dd4cbb42 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..23a6c360 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..64224c37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..5a5766ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..48d73dbd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..bcce14e7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..be05b188 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..c1e46832 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..19526617 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..6d6e1236 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..258b3a99 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..975a9128 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..47e394d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..034e88e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..67aa7e65 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..d165939c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..7f6db037 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml new file mode 100644 index 00000000..96c4c5bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..6a64fd5f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..c56f545c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..5624ee9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_01a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml new file mode 100644 index 00000000..23cd77ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml new file mode 100644 index 00000000..8f16d837 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..36b8beeb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..14d35704 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..2ffc9177 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..08de040c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..e3729066 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..446ae69e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..e918b8bc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..d88699a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..4720ea2d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..94c23c29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..0eba1445 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..638362e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..d14b4ef4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml new file mode 100644 index 00000000..41969f6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..02380c19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_01a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml new file mode 100644 index 00000000..f077a31a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_01a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..c043a278 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_01a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml new file mode 100644 index 00000000..98de9bfe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_01b +task: + - mmlu_alt_ov_01b_stem + - mmlu_alt_ov_01b_other + - mmlu_alt_ov_01b_social_sciences + - mmlu_alt_ov_01b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml new file mode 100644 index 00000000..3fc9f640 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_01 +doc_to_choice: !function ../../../styles.choice_01b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..c729cd5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..1f0fdcc0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..52479b69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..a6b2529c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..48d18e21 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..67be6bde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..b5a2b281 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..8e27b86f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..adc7363e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..10c82eca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..af4b7ac1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..e261a81f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..b18aff6b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..8b27fcbc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..5b6b77db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..b388c142 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..380b2c72 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..666586e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..7c3c0284 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..7c456554 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..ce932c88 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..e74ac6fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..dad2dad4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2e646650 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..3b648898 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..2d396ff6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..ed595897 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..9e15d293 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..c2ad6c7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..f9e7e8e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..eb383e71 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..c5942ac0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..34b171b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..890dc96a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml new file mode 100644 index 00000000..915a2e9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..61824c8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..393e1e74 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..12bd3d36 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_01b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml new file mode 100644 index 00000000..36c9e6cc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml new file mode 100644 index 00000000..8578bb86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..55304f5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..6fb4b8dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..5bf805a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..322a98d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..0f3553b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..badf51ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..eec54ea0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..bed584f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..574f13ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..3913e4b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..eb612a97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..36e179e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..c907ec26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml new file mode 100644 index 00000000..0045baee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..5fa5b959 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_01b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml new file mode 100644 index 00000000..f045210b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_01b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..d252c730 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_01b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml new file mode 100644 index 00000000..e3a06f31 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_01c +task: + - mmlu_alt_ov_01c_stem + - mmlu_alt_ov_01c_other + - mmlu_alt_ov_01c_social_sciences + - mmlu_alt_ov_01c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml new file mode 100644 index 00000000..aa68959a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_01 +doc_to_choice: !function ../../../styles.choice_01c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..f39778db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..b3df09f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..2ab730aa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..aaf6bce4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..26c029b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..e7ed41b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..d3c11799 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..65b4db34 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..eaa9f427 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..6e4c6877 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..0f30cc94 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..4ba1c770 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..1a0fac6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..16953f87 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..a24deb27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..78f00f57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..b96a0780 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..06c6be19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..5a54c7e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..8e30a672 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..2af74be5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..268121e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..120e737f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..db0fa59d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..979ff8b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..186799f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..1b6fcc67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..d733408b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..0005c6cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..262a2ae4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..53d3bb67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..ccf074ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..5365955d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..fb2c8df3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml new file mode 100644 index 00000000..a8d94795 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..d2eebff2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..20e3d93f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..c2af3d02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_01c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml new file mode 100644 index 00000000..44d477cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml new file mode 100644 index 00000000..424f941e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..540aba6b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..d76c9116 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..d3674820 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..eeb089c8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..ed8cd5bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..c40e4600 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..9e48d0a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..89660aae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..4fa85794 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..a59797c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..b7270adb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..5e830d81 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..13d8a3ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml new file mode 100644 index 00000000..20259753 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..ef314a4f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_01c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml new file mode 100644 index 00000000..8f4d3c87 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_01c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..a5e54260 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_01c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_01c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml new file mode 100644 index 00000000..fb86cab1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_02a +task: + - mmlu_alt_ov_02a_stem + - mmlu_alt_ov_02a_other + - mmlu_alt_ov_02a_social_sciences + - mmlu_alt_ov_02a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml new file mode 100644 index 00000000..f9d0cbe2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_02 +doc_to_choice: !function ../../../styles.choice_02a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..48affab8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..22c431f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..9d2c6ef8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..909cacad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..f954bef2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..a7f3b503 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..43f59c40 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..6e9e50b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..4fa13ec2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..8dd5e59d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..b5d48026 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..c51793c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..0ae991c7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..9c2453b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..f84ea46b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..70650738 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..58bed110 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..67c2bccb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..606cf7cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..a263a8db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..aaf11f19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..16d3c792 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..2890a9ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..7f9ea1ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..3f9d9bc0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..95849a8a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..4a47d2b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..d4bc7e85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..a68cad24 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..cc69fe7e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..f2d93fd2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..09cca52d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..6e13b1cc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..e41cde9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml new file mode 100644 index 00000000..e7329d83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..83e9ba83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..19f7be70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..fd9c96ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_02a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml new file mode 100644 index 00000000..f57c2349 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml new file mode 100644 index 00000000..96eae4ad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..be684a67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..1987aa8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..e0a677bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..5c0bfcf8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..0a9a9b8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..3f8a63e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..5d0e79d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..c32eb1f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..670486a0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..d2d8c0e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..4539eb83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..9a91416a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..4563db67 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml new file mode 100644 index 00000000..15a1c3cc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..78b0a872 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_02a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml new file mode 100644 index 00000000..5f2a94a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_02a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..43680f9d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_02a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml new file mode 100644 index 00000000..e44e24d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_02b +task: + - mmlu_alt_ov_02b_stem + - mmlu_alt_ov_02b_other + - mmlu_alt_ov_02b_social_sciences + - mmlu_alt_ov_02b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml new file mode 100644 index 00000000..61d42dc1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_02 +doc_to_choice: !function ../../../styles.choice_02b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..beb0b444 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..c191de4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..dead2a72 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..7606d9de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..590cc131 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..70fb2a00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..d2507d3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..aa2d3135 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..b3b3df6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..c0c13556 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..e6091f9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..cce99537 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..dd07cd4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..55fdada8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..23b5eca7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..2968ce23 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..eb5607a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..23d91ba6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..9a466aee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..ca1deecd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..8ac73230 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..16e61fcc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..edc8d86a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..5ee9b96f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b60e7ffd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..232fd0dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..2aff0816 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..279ff330 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..1045aae0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..d1aa0bd8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..bd244d70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..aef60150 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..df776001 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..aff4c80a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml new file mode 100644 index 00000000..86af3288 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..934f6dc2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..650cdb8e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..ab6e4b89 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_02b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml new file mode 100644 index 00000000..5172803a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml new file mode 100644 index 00000000..a6a29fc1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..8f719609 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..74111ccf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..6ab4c663 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..e3b1e389 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..a315c4a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..613c9029 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..4c6498ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..1d85788b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..1e73bdb4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..f3a83761 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..a64b92e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..11c8dafe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..cbe15e92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml new file mode 100644 index 00000000..d368d695 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..8967a97e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_02b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml new file mode 100644 index 00000000..8e257c1e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_02b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..6c2529c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_02b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml new file mode 100644 index 00000000..b203b6af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_02c +task: + - mmlu_alt_ov_02c_stem + - mmlu_alt_ov_02c_other + - mmlu_alt_ov_02c_social_sciences + - mmlu_alt_ov_02c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml new file mode 100644 index 00000000..15784a85 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_02 +doc_to_choice: !function ../../../styles.choice_02c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..2995969b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..79df6882 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..f637b8f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..f5719fe5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..69a546df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..a9d4f001 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..66b92c48 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..fab1db74 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..0448b27e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..a760b223 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..59008d60 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..c0f725bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..3a05daea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..2d60b394 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..4616d29f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..f1441a0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..0593170a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..e59ff221 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..733994e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..583722f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..9480cef1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..0e34274a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..db513bef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..ba41acc3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..155d67d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..1dfd8e70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..d5da992a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..7f7f6e47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..e6d7fc2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..58553014 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..74d97255 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..c2a85673 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..e44b95c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..dca8f5d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml new file mode 100644 index 00000000..9b6e4a88 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..6f785e0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..d610efbc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..791ec4f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_02c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml new file mode 100644 index 00000000..5a49c529 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml new file mode 100644 index 00000000..c735a6ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..e8b331eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..9de856e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..d8770f2a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..b5287479 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..db79e050 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..939e94b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..04432dae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..5ef6434b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..64f9c536 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..741176f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..bc25a5de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..f4148aea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..ff3d0a17 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml new file mode 100644 index 00000000..3ee886b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..35747cfc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_02c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml new file mode 100644 index 00000000..cbb163b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_02c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..0f6ebcde --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_02c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_02c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml new file mode 100644 index 00000000..aec0d599 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_03a +task: + - mmlu_alt_ov_03a_stem + - mmlu_alt_ov_03a_other + - mmlu_alt_ov_03a_social_sciences + - mmlu_alt_ov_03a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml new file mode 100644 index 00000000..dff6a860 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_03 +doc_to_choice: !function ../../../styles.choice_03a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..2dc68f41 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..3135aeba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..35db0fd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..ca43852c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..ee884e24 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..7ae31d42 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..fd30fd08 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..bd74f254 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..e45be70d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..391a6849 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..308cbfc7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..bd155bea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..0ff45f92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..6a1b2b26 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..c7c53393 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..b3e8eb0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..c7819c38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..8aadcf3d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..d4a6284f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..8709ea4f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..5323a1b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..8dd43ab5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..42929928 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..67d153e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b0dc557f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..001a11f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..eab63c5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..d5422c4b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..80df8ad2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..4ef4b260 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..cc86991a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..2ffa6b0d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..4f0c5e19 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..a9cc8675 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml new file mode 100644 index 00000000..d2baeb17 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..17bd9195 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..8c9b4c3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..c9e52901 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_03a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml new file mode 100644 index 00000000..a1d161c8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml new file mode 100644 index 00000000..6477216d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..cc1a91c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..ed6b320d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..2aeed4fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..878b7dfe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..e0c4a2cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..4a8d7020 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..b32bc19d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..0ae97513 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..744ee067 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..cc67308a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..e0e5dad0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..e26d3724 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..6f2d39ad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml new file mode 100644 index 00000000..808bc194 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..5cd2a44d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_03a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml new file mode 100644 index 00000000..542cbecd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_03a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..9383a90d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_03a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml new file mode 100644 index 00000000..97b9deff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_03b +task: + - mmlu_alt_ov_03b_stem + - mmlu_alt_ov_03b_other + - mmlu_alt_ov_03b_social_sciences + - mmlu_alt_ov_03b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml new file mode 100644 index 00000000..9c2f674d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_03 +doc_to_choice: !function ../../../styles.choice_03b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..8125f799 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..49e6b44a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..c08ddb38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..8e0c1a93 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..8a58597a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..7fd86165 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..3bae9bb7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..cb6c5b6e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..8a8ce24d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..2586b9b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..8cd632d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..85cf6203 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..6d553745 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..ba8a62e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..00974ad8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..42bc502a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..03478823 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..77632fe8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..72c770c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..1a9dfcdf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..e9fe7b8d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..6273d5f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..fbcd3c24 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..a4cdbf20 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b57f64fc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..6fe0f2fc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..30e29284 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..5e587ce8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..e8da0422 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..a6286b33 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..2e4fa836 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..da65a5be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..79822f51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..3c89a184 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml new file mode 100644 index 00000000..91558fa1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..2122f7e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..08315929 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..8ef6877c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_03b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml new file mode 100644 index 00000000..83dc821d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml new file mode 100644 index 00000000..fdb0b828 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..17de8f0f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..6537662e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..42e23d04 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..5f946143 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..b786e7d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..a8d1e4db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..d2710da1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..65c7e021 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..9c44d779 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..3f922754 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..69569a9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..150151b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..a73326a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml new file mode 100644 index 00000000..4f736efa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..277c2060 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_03b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml new file mode 100644 index 00000000..3ef400c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_03b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..86447db7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_03b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml new file mode 100644 index 00000000..279057b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_03c +task: + - mmlu_alt_ov_03c_stem + - mmlu_alt_ov_03c_other + - mmlu_alt_ov_03c_social_sciences + - mmlu_alt_ov_03c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml new file mode 100644 index 00000000..4f8725dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_03 +doc_to_choice: !function ../../../styles.choice_03c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..1efca54f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..de405dc9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..84beb052 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..3c899071 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..c1f239af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..fa70ae44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..f055b583 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..8ab1f595 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..13f4ff91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..7bf4c54c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..7142e1ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..1f5702d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..edc6abda --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..87646535 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..ded23bd8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..0b202d83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..e45b3363 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..a94b03d0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..af162db5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..32fe1ff2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..527ca779 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..2def5dd7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..9c4a8ae1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..0e7c87d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..06f527b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..598e5050 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..889c0704 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..e00f4b9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..c8ec2348 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..4a31f6b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..59dfeb48 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..a0219ce0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..3a702d72 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..eed54461 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml new file mode 100644 index 00000000..a2dd5de4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..21880b7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..2544c245 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..a5292e41 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_03c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml new file mode 100644 index 00000000..d3e62abd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml new file mode 100644 index 00000000..3958944a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..d67df1c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..f084f1fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..9b48f334 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..ab91e4ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..a238240d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..ed4fdb44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..83368c8e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..e3187a97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..16043103 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..7071e39c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..0339394f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..0e1a4f45 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..6141dbdf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml new file mode 100644 index 00000000..5d748f3e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..782cd716 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_03c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml new file mode 100644 index 00000000..d0f91a7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_03c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..1c3446b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_03c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_03c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml new file mode 100644 index 00000000..4c72d1b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_04a +task: + - mmlu_alt_ov_04a_stem + - mmlu_alt_ov_04a_other + - mmlu_alt_ov_04a_social_sciences + - mmlu_alt_ov_04a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml new file mode 100644 index 00000000..609cc706 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_04 +doc_to_choice: !function ../../../styles.choice_04a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..2c89e19a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..4a50e383 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..aec8048b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..2041d641 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..24564acb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..1cd9273a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..9fd97dfe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..ede1b3b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..eed15e0d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..15ef7355 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..9d04b497 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..c7699e90 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..b44faf2a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..9cf241d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..698f8bfe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..be3811c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..e621f77c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..1c6816b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..2951f396 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..d2bbee0a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..2404903f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..81885bb7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..4362b2a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..caaf9bd4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..51de1287 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..6355e89c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..1fb676d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..b2e3cf52 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..ba37f2fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..82fa0d32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..a99cda3a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..cd6972cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..4525a486 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..9bc378dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml new file mode 100644 index 00000000..b71af43a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..d3e2eafd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..8f662085 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..7960b3fd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_04a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml new file mode 100644 index 00000000..4a3eebad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml new file mode 100644 index 00000000..ff474e37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..d56452d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..756bf186 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..4a1d9f64 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..7278f5f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..9a87a851 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..5a25cd46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..4d84bea4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..03ae20de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..0eeddfe5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..e6f8ee82 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..a5f3538f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..ca501151 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..c4a9af05 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml new file mode 100644 index 00000000..ab0388bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..6279fec2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_04a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml new file mode 100644 index 00000000..aad84103 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_04a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..8ab8f6b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_04a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml new file mode 100644 index 00000000..1f9887c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_04b +task: + - mmlu_alt_ov_04b_stem + - mmlu_alt_ov_04b_other + - mmlu_alt_ov_04b_social_sciences + - mmlu_alt_ov_04b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml new file mode 100644 index 00000000..5d1810d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_04 +doc_to_choice: !function ../../../styles.choice_04b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..42d301b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..4c25ee11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..577d16bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..cf58d929 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..eb7eb963 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..bd42c5b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..10a9f5e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..a38fbd44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..44a15df0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..e5e807ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..da7057bc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..54ad5d1f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..72c6347d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..607811c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..e7098c13 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..8e9733b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..c255e82f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..8d852646 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..0d60b324 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..268f3fda --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..8b0b6795 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..5e336597 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..7639b9a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..97948e98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..7cfa35b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..a90b6520 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..f772e76f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..ca32bdc8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..43db99f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..509e0cba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..680a0241 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..424bffdb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..847c233e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..ab542ef1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml new file mode 100644 index 00000000..79e2a1e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..edef94a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..25d74687 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..f139710b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_04b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml new file mode 100644 index 00000000..94bb6aa3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml new file mode 100644 index 00000000..8fa3bd4b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..3c74cdd6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..c3cd247d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..3637540c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..b97a78e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..59584190 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..4080df18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..0101a60b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..a96d871a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..833a4015 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..304a57bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..84f4c03a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..d098cd9d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..6b8a5f1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml new file mode 100644 index 00000000..e2eedc10 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..79a219f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_04b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml new file mode 100644 index 00000000..4546ce88 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_04b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..f1be68d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_04b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml new file mode 100644 index 00000000..81f8329d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_04c +task: + - mmlu_alt_ov_04c_stem + - mmlu_alt_ov_04c_other + - mmlu_alt_ov_04c_social_sciences + - mmlu_alt_ov_04c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml new file mode 100644 index 00000000..88af060b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_04 +doc_to_choice: !function ../../../styles.choice_04c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..d5d39121 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..a9b85b6f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..e10dc1d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..a989ebc7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..55437c1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..75d9dc12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..cb439036 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..2f5e1816 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..85390337 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..fe8ddd58 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..36f582d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..cf32b53e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..546c74b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..9ddcf20e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..d5431c8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..daeb3efc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..39912a7b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..c9c866d7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..a04054c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..d7b6878f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..62d0a144 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..9a950e97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..2f1541bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..6cb89ef3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f8973675 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..e99c141b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..8173caaa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..0060251f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..5ad89e95 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..ebe72eac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..9ba0a18a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..e634f4af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..9510ba38 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..287bbbd7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml new file mode 100644 index 00000000..3c4b4e77 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..b3a269b0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..54a2680f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..708c0c80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_04c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml new file mode 100644 index 00000000..7467c45e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml new file mode 100644 index 00000000..3567ae8d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..3f9686c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..4529c5ea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..3f6f4da3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..1b5a23b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..b3697fdb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..fee89c7a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..3b9ebc08 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..36419277 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..a40f78c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..149272c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..f208377a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..ed5b5314 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..53d538b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml new file mode 100644 index 00000000..2ccd329f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..2d29fb76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_04c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml new file mode 100644 index 00000000..ea188a0d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_04c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..4ee86870 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_04c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_04c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml new file mode 100644 index 00000000..4f73aa15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_default_template_yaml @@ -0,0 +1,13 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}" +doc_to_choice: choices +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml new file mode 100644 index 00000000..83bce395 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_pv_01 +task: + - mmlu_alt_pv_01_stem + - mmlu_alt_pv_01_other + - mmlu_alt_pv_01_social_sciences + - mmlu_alt_pv_01_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..6ecfa807 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml new file mode 100644 index 00000000..ae8eb7a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml new file mode 100644 index 00000000..688106b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml new file mode 100644 index 00000000..31a0f39a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..b860ba1b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml new file mode 100644 index 00000000..8c5464ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..8938bd03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..9f0829a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..e0fde29e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml new file mode 100644 index 00000000..98e29dc6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml new file mode 100644 index 00000000..a9249498 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml new file mode 100644 index 00000000..fb127bad --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..9d054a75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml new file mode 100644 index 00000000..aa25237d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..551c1544 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..e8034806 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml new file mode 100644 index 00000000..7517dd5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml new file mode 100644 index 00000000..15f4a14e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..2d514e0b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..6d841bda --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..9b7266fd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..d19c3874 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..a6693a49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..6f6644d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..dff29707 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..39ea0984 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..327756b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..56c5999b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..22c5c6ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..4a7e279f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..223c548a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..ba3d32d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml new file mode 100644 index 00000000..320dcb01 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..e21ee56f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml new file mode 100644 index 00000000..d1e4e13a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..3c28b447 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..36d1cb5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml new file mode 100644 index 00000000..a56ea1ba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_pv_01_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml new file mode 100644 index 00000000..fa362939 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml new file mode 100644 index 00000000..635afd3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..ac06e3df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..9dfeadb3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..e74db2cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..0eb99f43 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml new file mode 100644 index 00000000..a097ddbd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml new file mode 100644 index 00000000..f0b463de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml new file mode 100644 index 00000000..588fbacb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..5d58389b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml new file mode 100644 index 00000000..b7681cfd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..0d26d9d3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..7a979cf0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml new file mode 100644 index 00000000..87a8a946 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml new file mode 100644 index 00000000..a89be56e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml new file mode 100644 index 00000000..7e2bc065 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..02016eb6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_pv_01_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml new file mode 100644 index 00000000..d08227a2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_pv_01_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml new file mode 100644 index 00000000..918db68b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_01/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_pv_01_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_01_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml new file mode 100644 index 00000000..083fb599 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_default_template_yaml @@ -0,0 +1,13 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "Q: {{question.strip()}}\nA:" +doc_to_choice: choices +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml new file mode 100644 index 00000000..c0ec579b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_pv_02 +task: + - mmlu_alt_pv_02_stem + - mmlu_alt_pv_02_other + - mmlu_alt_pv_02_social_sciences + - mmlu_alt_pv_02_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..8bd07995 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml new file mode 100644 index 00000000..cdc0dbce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml new file mode 100644 index 00000000..484cbe37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml new file mode 100644 index 00000000..e626b34a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..547e3067 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml new file mode 100644 index 00000000..2127089a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..75103c15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..49e60548 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..c450b9a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml new file mode 100644 index 00000000..695e45be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml new file mode 100644 index 00000000..db409508 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml new file mode 100644 index 00000000..3bad6297 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..cfc363ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml new file mode 100644 index 00000000..94aeee69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..189fdc9d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..9ebbb836 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml new file mode 100644 index 00000000..7aa10a12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml new file mode 100644 index 00000000..180635a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..50ea4496 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..82279900 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..5d48b507 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..78c6ace6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..bfc505c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..4581c806 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0ac0d5e1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..ddbd5349 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..9dddda2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..3337914e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..b32de65e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..c9716f98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..292b857e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..2449fc17 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml new file mode 100644 index 00000000..a7c912ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..1cc26aae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml new file mode 100644 index 00000000..55859077 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..5273a53e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..822c67dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml new file mode 100644 index 00000000..0a3bb9d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_pv_02_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml new file mode 100644 index 00000000..e4af9ba0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml new file mode 100644 index 00000000..1a23ba70 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..c4792672 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..f37e75ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..0657ff03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..4774d817 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml new file mode 100644 index 00000000..ccc9c922 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml new file mode 100644 index 00000000..2daae4ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml new file mode 100644 index 00000000..ba217bbe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..b9cbc5f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml new file mode 100644 index 00000000..15828b99 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..a6800e9b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..3dc609c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml new file mode 100644 index 00000000..3c798918 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml new file mode 100644 index 00000000..1b5086bb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml new file mode 100644 index 00000000..726bf086 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..80962e7e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_pv_02_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml new file mode 100644 index 00000000..66cd00c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_pv_02_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml new file mode 100644 index 00000000..92dd159a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_02/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_pv_02_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_02_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml new file mode 100644 index 00000000..07f98304 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_default_template_yaml @@ -0,0 +1,13 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "Question: {{question.strip()}}\nAnswer:" +doc_to_choice: choices +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml new file mode 100644 index 00000000..39e434a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_pv_03 +task: + - mmlu_alt_pv_03_stem + - mmlu_alt_pv_03_other + - mmlu_alt_pv_03_social_sciences + - mmlu_alt_pv_03_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..7941d4a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml new file mode 100644 index 00000000..6d04c321 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml new file mode 100644 index 00000000..d31997eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml new file mode 100644 index 00000000..aea729b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..949c5fe4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml new file mode 100644 index 00000000..f2a947f2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..29e01ad4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..ae23319a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..b349a40a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml new file mode 100644 index 00000000..ccc5fd2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml new file mode 100644 index 00000000..cdb18383 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml new file mode 100644 index 00000000..b2ef889d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..21517124 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml new file mode 100644 index 00000000..c42e5ca8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..b3c61ff8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..b533bbc1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml new file mode 100644 index 00000000..02e4314b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml new file mode 100644 index 00000000..508ddfee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..d12e03e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..a7d6d032 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..d909a678 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..03c4df82 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..1231d693 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2bb26aff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..62db2eaf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..8a690a27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..e924615d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..095d9964 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..a0957767 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..2a7ddd5b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..7c6e9d12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..77107f32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml new file mode 100644 index 00000000..9aeaece1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..0dd688bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml new file mode 100644 index 00000000..d284aa6c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..43ecb1c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..89dcf27d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml new file mode 100644 index 00000000..1131165e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_pv_03_stem" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml new file mode 100644 index 00000000..72400f1f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml new file mode 100644 index 00000000..52332eed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..8efa7e9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..39b9ae77 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..eb08c777 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..84cc1692 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml new file mode 100644 index 00000000..c5f4a05f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml new file mode 100644 index 00000000..a7c3408d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml new file mode 100644 index 00000000..20294008 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..13f6cefb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml new file mode 100644 index 00000000..461f5ba1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..f0a90066 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..f61d3e40 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml new file mode 100644 index 00000000..2b2e2a97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml new file mode 100644 index 00000000..3e8896d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml new file mode 100644 index 00000000..d808d4ec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..f3c68319 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_pv_03_social_sciences" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml new file mode 100644 index 00000000..d710576d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_pv_03_other" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml new file mode 100644 index 00000000..e8367f96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/prompt_variation/style_03/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_pv_03_humanities" +"include": "_default_template_yaml" +"task": "mmlu_alt_pv_03_world_religions" -- GitLab From ec656bb15e58330f57f8f72ea71e1120d2d7f186 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:23:40 +0000 Subject: [PATCH 33/50] format --- lm_eval/tasks/arithmetic/alternative_worlds/utils.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py index 6f2e69c0..56ad74ac 100644 --- a/lm_eval/tasks/arithmetic/alternative_worlds/utils.py +++ b/lm_eval/tasks/arithmetic/alternative_worlds/utils.py @@ -2,26 +2,24 @@ import re # Original Prompt # Question: What is (9 + 8) * 2? Answer: -def style_00(docs): + +def style_00(docs): # What is (9 + 8) * 2? return docs["context"] def style_01(docs): - # What is (9 + 8) * 2? return docs["context"].replace("Question: ", "").replace(" Answer:", "") def style_02(docs): - # Q: What is (9 + 8) * 2? A: return docs["context"].replace("Question: ", "Q: ").replace(" Answer:", " A:") def style_03(docs): - # Solve (9 + 8) * 2. return ( docs["context"].replace("Question: What is", "Solve").replace(" Answer:", ".") @@ -29,12 +27,10 @@ def style_03(docs): def style_04(docs): - # (9 + 8) * 2 = return docs["context"].replace("Question: What is ", "").replace(" Answer:", " =") def style_05(docs): - # What is (9 + 8) * 2? Answer: return docs["context"].replace("Question: ", "") -- GitLab From 6fdecc6f183b8fe65024ec7fd8ee3d88209c747e Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:27:28 +0000 Subject: [PATCH 34/50] reveresed os.walk --- lm_eval/tasks/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py index 28563de6..a776add0 100644 --- a/lm_eval/tasks/__init__.py +++ b/lm_eval/tasks/__init__.py @@ -131,7 +131,7 @@ def include_task_folder(task_dir: str, register_task: bool = True) -> None: """ Calling this function """ - for root, subdirs, file_list in os.walk(task_dir): + for root, subdirs, file_list in reversed(list(os.walk(task_dir))): # if (subdirs == [] or subdirs == ["__pycache__"]) and (len(file_list) > 0): for f in file_list: if f.endswith(".yaml"): -- GitLab From d5c6f0e5d093e9fc387d3c724cf8a56139fd885d Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:28:19 +0000 Subject: [PATCH 35/50] moved files --- .../output_variation/{ => style_01}/_mmlu_ov_01.yaml | 0 .../output_variation/{ => style_02}/_mmlu_ov_02.yaml | 0 .../output_variation/{ => style_03}/_mmlu_ov_03.yaml | 0 .../output_variation/{ => style_04}/_mmlu_ov_04.yaml | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename lm_eval/tasks/mmlu/alternative_worlds/output_variation/{ => style_01}/_mmlu_ov_01.yaml (100%) rename lm_eval/tasks/mmlu/alternative_worlds/output_variation/{ => style_02}/_mmlu_ov_02.yaml (100%) rename lm_eval/tasks/mmlu/alternative_worlds/output_variation/{ => style_03}/_mmlu_ov_03.yaml (100%) rename lm_eval/tasks/mmlu/alternative_worlds/output_variation/{ => style_04}/_mmlu_ov_04.yaml (100%) diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_01.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/_mmlu_ov_01.yaml similarity index 100% rename from lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_01.yaml rename to lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_01/_mmlu_ov_01.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_02.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/_mmlu_ov_02.yaml similarity index 100% rename from lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_02.yaml rename to lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_02/_mmlu_ov_02.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_03.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/_mmlu_ov_03.yaml similarity index 100% rename from lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_03.yaml rename to lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_03/_mmlu_ov_03.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_04.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/_mmlu_ov_04.yaml similarity index 100% rename from lm_eval/tasks/mmlu/alternative_worlds/output_variation/_mmlu_ov_04.yaml rename to lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_04/_mmlu_ov_04.yaml -- GitLab From cd4ef5e8995486290dec4f45345839c289741a18 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:28:46 +0000 Subject: [PATCH 36/50] removed comments --- .../alternative_worlds/mmlu_prompt_variation.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml index 3fd3ea5e..d210cd14 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_prompt_variation.yaml @@ -3,15 +3,3 @@ task: - mmlu_alt_pv_01 - mmlu_alt_pv_02 - mmlu_alt_pv_03 - # - mmlu_alt_pv_01_stem - # - mmlu_alt_pv_01_other - # - mmlu_alt_pv_01_social_sciences - # - mmlu_alt_pv_01_humanities - # - mmlu_alt_pv_02_stem - # - mmlu_alt_pv_02_other - # - mmlu_alt_pv_02_social_sciences - # - mmlu_alt_pv_02_humanities - # - mmlu_alt_pv_03_stem - # - mmlu_alt_pv_03_other - # - mmlu_alt_pv_03_social_sciences - # - mmlu_alt_pv_03_humanities -- GitLab From e96c64847d2e3009bca76624ab603362163377d9 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:36:53 +0000 Subject: [PATCH 37/50] allow doc_to_choice to be a single feature from a dataset --- lm_eval/api/task.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 5f8b1476..91fa7ac1 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -936,7 +936,10 @@ class ConfigurableTask(Task): doc_to_choice = self.config.doc_to_choice if type(doc_to_choice) == str: - return ast.literal_eval(utils.apply_template(doc_to_choice, doc)) + if doc_to_choice in self.features: + return doc[doc_to_choice] + else: + return ast.literal_eval(utils.apply_template(doc_to_choice, doc)) elif type(doc_to_choice) == list: return doc_to_choice elif type(doc_to_choice) == dict: -- GitLab From 3c5f347f687d683e7585bab5e9cc33f19d51b81f Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:37:18 +0000 Subject: [PATCH 38/50] fixed list --- .../mmlu_output_variation.yaml | 32 +++++-------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml index 29cd1544..3564b3a3 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml @@ -1,26 +1,10 @@ group: mmlu_alt_ov task: - - mmlu_alt_ov_01a - - mmlu_alt_ov_01b - - mmlu_alt_ov_01c - - mmlu_alt_ov_02a - - mmlu_alt_ov_02b - - mmlu_alt_ov_02c - - mmlu_alt_ov_03a - - mmlu_alt_ov_03b - - mmlu_alt_ov_03c - - mmlu_alt_ov_04a - - mmlu_alt_ov_04b - - mmlu_alt_ov_04c - - mmlu_alt_ov_05a - - mmlu_alt_ov_05b - - mmlu_alt_ov_05c - - mmlu_alt_ov_06a - - mmlu_alt_ov_06b - - mmlu_alt_ov_06c - - mmlu_alt_ov_07a - - mmlu_alt_ov_07b - - mmlu_alt_ov_07c - - mmlu_alt_ov_08a - - mmlu_alt_ov_08b - - mmlu_alt_ov_08c + - mmlu_alt_ov_01 + - mmlu_alt_ov_02 + - mmlu_alt_ov_03 + - mmlu_alt_ov_04 + - mmlu_alt_ov_05 + - mmlu_alt_ov_06 + - mmlu_alt_ov_07 + - mmlu_alt_ov_08 \ No newline at end of file -- GitLab From 5da401b070f3cb92e37c96e720319f87aa3eb83d Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 04:37:40 +0000 Subject: [PATCH 39/50] add styles 5 -8 --- .../output_variation/style_05/_mmlu_ov_05.yaml | 5 +++++ .../output_variation/style_05/a/_mmlu.yaml | 6 ++++++ .../output_variation/style_05/a/_template_yaml | 11 +++++++++++ .../style_05/a/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_astronomy.yaml | 6 ++++++ .../style_05/a/mmlu_business_ethics.yaml | 6 ++++++ .../style_05/a/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_05/a/mmlu_college_biology.yaml | 6 ++++++ .../style_05/a/mmlu_college_chemistry.yaml | 6 ++++++ .../style_05/a/mmlu_college_computer_science.yaml | 6 ++++++ .../style_05/a/mmlu_college_mathematics.yaml | 6 ++++++ .../style_05/a/mmlu_college_medicine.yaml | 6 ++++++ .../style_05/a/mmlu_college_physics.yaml | 6 ++++++ .../style_05/a/mmlu_computer_security.yaml | 6 ++++++ .../style_05/a/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_05/a/mmlu_econometrics.yaml | 6 ++++++ .../style_05/a/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_05/a/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_05/a/mmlu_formal_logic.yaml | 6 ++++++ .../style_05/a/mmlu_global_facts.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_biology.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_geography.yaml | 6 ++++++ .../a/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_physics.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_05/a/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_human_aging.yaml | 6 ++++++ .../style_05/a/mmlu_human_sexuality.yaml | 6 ++++++ .../style_05/a/mmlu_international_law.yaml | 6 ++++++ .../style_05/a/mmlu_jurisprudence.yaml | 6 ++++++ .../style_05/a/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_05/a/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_management.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_marketing.yaml | 6 ++++++ .../style_05/a/mmlu_medical_genetics.yaml | 6 ++++++ .../style_05/a/mmlu_miscellaneous.yaml | 6 ++++++ .../style_05/a/mmlu_moral_disputes.yaml | 6 ++++++ .../style_05/a/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_prehistory.yaml | 6 ++++++ .../style_05/a/mmlu_professional_accounting.yaml | 6 ++++++ .../style_05/a/mmlu_professional_law.yaml | 6 ++++++ .../style_05/a/mmlu_professional_medicine.yaml | 6 ++++++ .../style_05/a/mmlu_professional_psychology.yaml | 6 ++++++ .../style_05/a/mmlu_public_relations.yaml | 6 ++++++ .../style_05/a/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_sociology.yaml | 6 ++++++ .../style_05/a/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_05/a/mmlu_virology.yaml | 6 ++++++ .../style_05/a/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_05/b/_mmlu.yaml | 6 ++++++ .../output_variation/style_05/b/_template_yaml | 11 +++++++++++ .../style_05/b/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_astronomy.yaml | 6 ++++++ .../style_05/b/mmlu_business_ethics.yaml | 6 ++++++ .../style_05/b/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_05/b/mmlu_college_biology.yaml | 6 ++++++ .../style_05/b/mmlu_college_chemistry.yaml | 6 ++++++ .../style_05/b/mmlu_college_computer_science.yaml | 6 ++++++ .../style_05/b/mmlu_college_mathematics.yaml | 6 ++++++ .../style_05/b/mmlu_college_medicine.yaml | 6 ++++++ .../style_05/b/mmlu_college_physics.yaml | 6 ++++++ .../style_05/b/mmlu_computer_security.yaml | 6 ++++++ .../style_05/b/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_05/b/mmlu_econometrics.yaml | 6 ++++++ .../style_05/b/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_05/b/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_05/b/mmlu_formal_logic.yaml | 6 ++++++ .../style_05/b/mmlu_global_facts.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_biology.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_geography.yaml | 6 ++++++ .../b/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_physics.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_05/b/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_human_aging.yaml | 6 ++++++ .../style_05/b/mmlu_human_sexuality.yaml | 6 ++++++ .../style_05/b/mmlu_international_law.yaml | 6 ++++++ .../style_05/b/mmlu_jurisprudence.yaml | 6 ++++++ .../style_05/b/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_05/b/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_management.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_marketing.yaml | 6 ++++++ .../style_05/b/mmlu_medical_genetics.yaml | 6 ++++++ .../style_05/b/mmlu_miscellaneous.yaml | 6 ++++++ .../style_05/b/mmlu_moral_disputes.yaml | 6 ++++++ .../style_05/b/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_prehistory.yaml | 6 ++++++ .../style_05/b/mmlu_professional_accounting.yaml | 6 ++++++ .../style_05/b/mmlu_professional_law.yaml | 6 ++++++ .../style_05/b/mmlu_professional_medicine.yaml | 6 ++++++ .../style_05/b/mmlu_professional_psychology.yaml | 6 ++++++ .../style_05/b/mmlu_public_relations.yaml | 6 ++++++ .../style_05/b/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_sociology.yaml | 6 ++++++ .../style_05/b/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_05/b/mmlu_virology.yaml | 6 ++++++ .../style_05/b/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_05/c/_mmlu.yaml | 6 ++++++ .../output_variation/style_05/c/_template_yaml | 11 +++++++++++ .../style_05/c/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_astronomy.yaml | 6 ++++++ .../style_05/c/mmlu_business_ethics.yaml | 6 ++++++ .../style_05/c/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_05/c/mmlu_college_biology.yaml | 6 ++++++ .../style_05/c/mmlu_college_chemistry.yaml | 6 ++++++ .../style_05/c/mmlu_college_computer_science.yaml | 6 ++++++ .../style_05/c/mmlu_college_mathematics.yaml | 6 ++++++ .../style_05/c/mmlu_college_medicine.yaml | 6 ++++++ .../style_05/c/mmlu_college_physics.yaml | 6 ++++++ .../style_05/c/mmlu_computer_security.yaml | 6 ++++++ .../style_05/c/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_05/c/mmlu_econometrics.yaml | 6 ++++++ .../style_05/c/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_05/c/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_05/c/mmlu_formal_logic.yaml | 6 ++++++ .../style_05/c/mmlu_global_facts.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_biology.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_geography.yaml | 6 ++++++ .../c/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_physics.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_05/c/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_human_aging.yaml | 6 ++++++ .../style_05/c/mmlu_human_sexuality.yaml | 6 ++++++ .../style_05/c/mmlu_international_law.yaml | 6 ++++++ .../style_05/c/mmlu_jurisprudence.yaml | 6 ++++++ .../style_05/c/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_05/c/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_management.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_marketing.yaml | 6 ++++++ .../style_05/c/mmlu_medical_genetics.yaml | 6 ++++++ .../style_05/c/mmlu_miscellaneous.yaml | 6 ++++++ .../style_05/c/mmlu_moral_disputes.yaml | 6 ++++++ .../style_05/c/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_prehistory.yaml | 6 ++++++ .../style_05/c/mmlu_professional_accounting.yaml | 6 ++++++ .../style_05/c/mmlu_professional_law.yaml | 6 ++++++ .../style_05/c/mmlu_professional_medicine.yaml | 6 ++++++ .../style_05/c/mmlu_professional_psychology.yaml | 6 ++++++ .../style_05/c/mmlu_public_relations.yaml | 6 ++++++ .../style_05/c/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_sociology.yaml | 6 ++++++ .../style_05/c/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_05/c/mmlu_virology.yaml | 6 ++++++ .../style_05/c/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_06/_mmlu_ov_06.yaml | 5 +++++ .../output_variation/style_06/a/_mmlu.yaml | 6 ++++++ .../output_variation/style_06/a/_template_yaml | 11 +++++++++++ .../style_06/a/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_astronomy.yaml | 6 ++++++ .../style_06/a/mmlu_business_ethics.yaml | 6 ++++++ .../style_06/a/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_06/a/mmlu_college_biology.yaml | 6 ++++++ .../style_06/a/mmlu_college_chemistry.yaml | 6 ++++++ .../style_06/a/mmlu_college_computer_science.yaml | 6 ++++++ .../style_06/a/mmlu_college_mathematics.yaml | 6 ++++++ .../style_06/a/mmlu_college_medicine.yaml | 6 ++++++ .../style_06/a/mmlu_college_physics.yaml | 6 ++++++ .../style_06/a/mmlu_computer_security.yaml | 6 ++++++ .../style_06/a/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_06/a/mmlu_econometrics.yaml | 6 ++++++ .../style_06/a/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_06/a/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_06/a/mmlu_formal_logic.yaml | 6 ++++++ .../style_06/a/mmlu_global_facts.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_biology.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_geography.yaml | 6 ++++++ .../a/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_physics.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_06/a/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_human_aging.yaml | 6 ++++++ .../style_06/a/mmlu_human_sexuality.yaml | 6 ++++++ .../style_06/a/mmlu_international_law.yaml | 6 ++++++ .../style_06/a/mmlu_jurisprudence.yaml | 6 ++++++ .../style_06/a/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_06/a/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_management.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_marketing.yaml | 6 ++++++ .../style_06/a/mmlu_medical_genetics.yaml | 6 ++++++ .../style_06/a/mmlu_miscellaneous.yaml | 6 ++++++ .../style_06/a/mmlu_moral_disputes.yaml | 6 ++++++ .../style_06/a/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_prehistory.yaml | 6 ++++++ .../style_06/a/mmlu_professional_accounting.yaml | 6 ++++++ .../style_06/a/mmlu_professional_law.yaml | 6 ++++++ .../style_06/a/mmlu_professional_medicine.yaml | 6 ++++++ .../style_06/a/mmlu_professional_psychology.yaml | 6 ++++++ .../style_06/a/mmlu_public_relations.yaml | 6 ++++++ .../style_06/a/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_sociology.yaml | 6 ++++++ .../style_06/a/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_06/a/mmlu_virology.yaml | 6 ++++++ .../style_06/a/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_06/b/_mmlu.yaml | 6 ++++++ .../output_variation/style_06/b/_template_yaml | 11 +++++++++++ .../style_06/b/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_astronomy.yaml | 6 ++++++ .../style_06/b/mmlu_business_ethics.yaml | 6 ++++++ .../style_06/b/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_06/b/mmlu_college_biology.yaml | 6 ++++++ .../style_06/b/mmlu_college_chemistry.yaml | 6 ++++++ .../style_06/b/mmlu_college_computer_science.yaml | 6 ++++++ .../style_06/b/mmlu_college_mathematics.yaml | 6 ++++++ .../style_06/b/mmlu_college_medicine.yaml | 6 ++++++ .../style_06/b/mmlu_college_physics.yaml | 6 ++++++ .../style_06/b/mmlu_computer_security.yaml | 6 ++++++ .../style_06/b/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_06/b/mmlu_econometrics.yaml | 6 ++++++ .../style_06/b/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_06/b/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_06/b/mmlu_formal_logic.yaml | 6 ++++++ .../style_06/b/mmlu_global_facts.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_biology.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_geography.yaml | 6 ++++++ .../b/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_physics.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_06/b/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_human_aging.yaml | 6 ++++++ .../style_06/b/mmlu_human_sexuality.yaml | 6 ++++++ .../style_06/b/mmlu_international_law.yaml | 6 ++++++ .../style_06/b/mmlu_jurisprudence.yaml | 6 ++++++ .../style_06/b/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_06/b/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_management.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_marketing.yaml | 6 ++++++ .../style_06/b/mmlu_medical_genetics.yaml | 6 ++++++ .../style_06/b/mmlu_miscellaneous.yaml | 6 ++++++ .../style_06/b/mmlu_moral_disputes.yaml | 6 ++++++ .../style_06/b/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_prehistory.yaml | 6 ++++++ .../style_06/b/mmlu_professional_accounting.yaml | 6 ++++++ .../style_06/b/mmlu_professional_law.yaml | 6 ++++++ .../style_06/b/mmlu_professional_medicine.yaml | 6 ++++++ .../style_06/b/mmlu_professional_psychology.yaml | 6 ++++++ .../style_06/b/mmlu_public_relations.yaml | 6 ++++++ .../style_06/b/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_sociology.yaml | 6 ++++++ .../style_06/b/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_06/b/mmlu_virology.yaml | 6 ++++++ .../style_06/b/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_06/c/_mmlu.yaml | 6 ++++++ .../output_variation/style_06/c/_template_yaml | 11 +++++++++++ .../style_06/c/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_astronomy.yaml | 6 ++++++ .../style_06/c/mmlu_business_ethics.yaml | 6 ++++++ .../style_06/c/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_06/c/mmlu_college_biology.yaml | 6 ++++++ .../style_06/c/mmlu_college_chemistry.yaml | 6 ++++++ .../style_06/c/mmlu_college_computer_science.yaml | 6 ++++++ .../style_06/c/mmlu_college_mathematics.yaml | 6 ++++++ .../style_06/c/mmlu_college_medicine.yaml | 6 ++++++ .../style_06/c/mmlu_college_physics.yaml | 6 ++++++ .../style_06/c/mmlu_computer_security.yaml | 6 ++++++ .../style_06/c/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_06/c/mmlu_econometrics.yaml | 6 ++++++ .../style_06/c/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_06/c/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_06/c/mmlu_formal_logic.yaml | 6 ++++++ .../style_06/c/mmlu_global_facts.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_biology.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_geography.yaml | 6 ++++++ .../c/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_physics.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_06/c/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_human_aging.yaml | 6 ++++++ .../style_06/c/mmlu_human_sexuality.yaml | 6 ++++++ .../style_06/c/mmlu_international_law.yaml | 6 ++++++ .../style_06/c/mmlu_jurisprudence.yaml | 6 ++++++ .../style_06/c/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_06/c/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_management.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_marketing.yaml | 6 ++++++ .../style_06/c/mmlu_medical_genetics.yaml | 6 ++++++ .../style_06/c/mmlu_miscellaneous.yaml | 6 ++++++ .../style_06/c/mmlu_moral_disputes.yaml | 6 ++++++ .../style_06/c/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_prehistory.yaml | 6 ++++++ .../style_06/c/mmlu_professional_accounting.yaml | 6 ++++++ .../style_06/c/mmlu_professional_law.yaml | 6 ++++++ .../style_06/c/mmlu_professional_medicine.yaml | 6 ++++++ .../style_06/c/mmlu_professional_psychology.yaml | 6 ++++++ .../style_06/c/mmlu_public_relations.yaml | 6 ++++++ .../style_06/c/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_sociology.yaml | 6 ++++++ .../style_06/c/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_06/c/mmlu_virology.yaml | 6 ++++++ .../style_06/c/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_07/_mmlu_ov_07.yaml | 5 +++++ .../output_variation/style_07/a/_mmlu.yaml | 6 ++++++ .../output_variation/style_07/a/_template_yaml | 11 +++++++++++ .../style_07/a/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_astronomy.yaml | 6 ++++++ .../style_07/a/mmlu_business_ethics.yaml | 6 ++++++ .../style_07/a/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_07/a/mmlu_college_biology.yaml | 6 ++++++ .../style_07/a/mmlu_college_chemistry.yaml | 6 ++++++ .../style_07/a/mmlu_college_computer_science.yaml | 6 ++++++ .../style_07/a/mmlu_college_mathematics.yaml | 6 ++++++ .../style_07/a/mmlu_college_medicine.yaml | 6 ++++++ .../style_07/a/mmlu_college_physics.yaml | 6 ++++++ .../style_07/a/mmlu_computer_security.yaml | 6 ++++++ .../style_07/a/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_07/a/mmlu_econometrics.yaml | 6 ++++++ .../style_07/a/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_07/a/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_07/a/mmlu_formal_logic.yaml | 6 ++++++ .../style_07/a/mmlu_global_facts.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_biology.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_geography.yaml | 6 ++++++ .../a/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_physics.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_07/a/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_human_aging.yaml | 6 ++++++ .../style_07/a/mmlu_human_sexuality.yaml | 6 ++++++ .../style_07/a/mmlu_international_law.yaml | 6 ++++++ .../style_07/a/mmlu_jurisprudence.yaml | 6 ++++++ .../style_07/a/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_07/a/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_management.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_marketing.yaml | 6 ++++++ .../style_07/a/mmlu_medical_genetics.yaml | 6 ++++++ .../style_07/a/mmlu_miscellaneous.yaml | 6 ++++++ .../style_07/a/mmlu_moral_disputes.yaml | 6 ++++++ .../style_07/a/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_prehistory.yaml | 6 ++++++ .../style_07/a/mmlu_professional_accounting.yaml | 6 ++++++ .../style_07/a/mmlu_professional_law.yaml | 6 ++++++ .../style_07/a/mmlu_professional_medicine.yaml | 6 ++++++ .../style_07/a/mmlu_professional_psychology.yaml | 6 ++++++ .../style_07/a/mmlu_public_relations.yaml | 6 ++++++ .../style_07/a/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_sociology.yaml | 6 ++++++ .../style_07/a/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_07/a/mmlu_virology.yaml | 6 ++++++ .../style_07/a/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_07/b/_mmlu.yaml | 6 ++++++ .../output_variation/style_07/b/_template_yaml | 11 +++++++++++ .../style_07/b/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_astronomy.yaml | 6 ++++++ .../style_07/b/mmlu_business_ethics.yaml | 6 ++++++ .../style_07/b/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_07/b/mmlu_college_biology.yaml | 6 ++++++ .../style_07/b/mmlu_college_chemistry.yaml | 6 ++++++ .../style_07/b/mmlu_college_computer_science.yaml | 6 ++++++ .../style_07/b/mmlu_college_mathematics.yaml | 6 ++++++ .../style_07/b/mmlu_college_medicine.yaml | 6 ++++++ .../style_07/b/mmlu_college_physics.yaml | 6 ++++++ .../style_07/b/mmlu_computer_security.yaml | 6 ++++++ .../style_07/b/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_07/b/mmlu_econometrics.yaml | 6 ++++++ .../style_07/b/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_07/b/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_07/b/mmlu_formal_logic.yaml | 6 ++++++ .../style_07/b/mmlu_global_facts.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_biology.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_geography.yaml | 6 ++++++ .../b/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_physics.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_07/b/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_human_aging.yaml | 6 ++++++ .../style_07/b/mmlu_human_sexuality.yaml | 6 ++++++ .../style_07/b/mmlu_international_law.yaml | 6 ++++++ .../style_07/b/mmlu_jurisprudence.yaml | 6 ++++++ .../style_07/b/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_07/b/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_management.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_marketing.yaml | 6 ++++++ .../style_07/b/mmlu_medical_genetics.yaml | 6 ++++++ .../style_07/b/mmlu_miscellaneous.yaml | 6 ++++++ .../style_07/b/mmlu_moral_disputes.yaml | 6 ++++++ .../style_07/b/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_prehistory.yaml | 6 ++++++ .../style_07/b/mmlu_professional_accounting.yaml | 6 ++++++ .../style_07/b/mmlu_professional_law.yaml | 6 ++++++ .../style_07/b/mmlu_professional_medicine.yaml | 6 ++++++ .../style_07/b/mmlu_professional_psychology.yaml | 6 ++++++ .../style_07/b/mmlu_public_relations.yaml | 6 ++++++ .../style_07/b/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_sociology.yaml | 6 ++++++ .../style_07/b/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_07/b/mmlu_virology.yaml | 6 ++++++ .../style_07/b/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_07/c/_mmlu.yaml | 6 ++++++ .../output_variation/style_07/c/_template_yaml | 11 +++++++++++ .../style_07/c/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_astronomy.yaml | 6 ++++++ .../style_07/c/mmlu_business_ethics.yaml | 6 ++++++ .../style_07/c/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_07/c/mmlu_college_biology.yaml | 6 ++++++ .../style_07/c/mmlu_college_chemistry.yaml | 6 ++++++ .../style_07/c/mmlu_college_computer_science.yaml | 6 ++++++ .../style_07/c/mmlu_college_mathematics.yaml | 6 ++++++ .../style_07/c/mmlu_college_medicine.yaml | 6 ++++++ .../style_07/c/mmlu_college_physics.yaml | 6 ++++++ .../style_07/c/mmlu_computer_security.yaml | 6 ++++++ .../style_07/c/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_07/c/mmlu_econometrics.yaml | 6 ++++++ .../style_07/c/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_07/c/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_07/c/mmlu_formal_logic.yaml | 6 ++++++ .../style_07/c/mmlu_global_facts.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_biology.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_geography.yaml | 6 ++++++ .../c/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_physics.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_07/c/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_human_aging.yaml | 6 ++++++ .../style_07/c/mmlu_human_sexuality.yaml | 6 ++++++ .../style_07/c/mmlu_international_law.yaml | 6 ++++++ .../style_07/c/mmlu_jurisprudence.yaml | 6 ++++++ .../style_07/c/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_07/c/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_management.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_marketing.yaml | 6 ++++++ .../style_07/c/mmlu_medical_genetics.yaml | 6 ++++++ .../style_07/c/mmlu_miscellaneous.yaml | 6 ++++++ .../style_07/c/mmlu_moral_disputes.yaml | 6 ++++++ .../style_07/c/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_prehistory.yaml | 6 ++++++ .../style_07/c/mmlu_professional_accounting.yaml | 6 ++++++ .../style_07/c/mmlu_professional_law.yaml | 6 ++++++ .../style_07/c/mmlu_professional_medicine.yaml | 6 ++++++ .../style_07/c/mmlu_professional_psychology.yaml | 6 ++++++ .../style_07/c/mmlu_public_relations.yaml | 6 ++++++ .../style_07/c/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_sociology.yaml | 6 ++++++ .../style_07/c/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_07/c/mmlu_virology.yaml | 6 ++++++ .../style_07/c/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_08/_mmlu_ov_08.yaml | 5 +++++ .../output_variation/style_08/a/_mmlu.yaml | 6 ++++++ .../output_variation/style_08/a/_template_yaml | 11 +++++++++++ .../style_08/a/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_astronomy.yaml | 6 ++++++ .../style_08/a/mmlu_business_ethics.yaml | 6 ++++++ .../style_08/a/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_08/a/mmlu_college_biology.yaml | 6 ++++++ .../style_08/a/mmlu_college_chemistry.yaml | 6 ++++++ .../style_08/a/mmlu_college_computer_science.yaml | 6 ++++++ .../style_08/a/mmlu_college_mathematics.yaml | 6 ++++++ .../style_08/a/mmlu_college_medicine.yaml | 6 ++++++ .../style_08/a/mmlu_college_physics.yaml | 6 ++++++ .../style_08/a/mmlu_computer_security.yaml | 6 ++++++ .../style_08/a/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_08/a/mmlu_econometrics.yaml | 6 ++++++ .../style_08/a/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_08/a/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_08/a/mmlu_formal_logic.yaml | 6 ++++++ .../style_08/a/mmlu_global_facts.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_biology.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_geography.yaml | 6 ++++++ .../a/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_physics.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_08/a/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_human_aging.yaml | 6 ++++++ .../style_08/a/mmlu_human_sexuality.yaml | 6 ++++++ .../style_08/a/mmlu_international_law.yaml | 6 ++++++ .../style_08/a/mmlu_jurisprudence.yaml | 6 ++++++ .../style_08/a/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_08/a/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_management.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_marketing.yaml | 6 ++++++ .../style_08/a/mmlu_medical_genetics.yaml | 6 ++++++ .../style_08/a/mmlu_miscellaneous.yaml | 6 ++++++ .../style_08/a/mmlu_moral_disputes.yaml | 6 ++++++ .../style_08/a/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_prehistory.yaml | 6 ++++++ .../style_08/a/mmlu_professional_accounting.yaml | 6 ++++++ .../style_08/a/mmlu_professional_law.yaml | 6 ++++++ .../style_08/a/mmlu_professional_medicine.yaml | 6 ++++++ .../style_08/a/mmlu_professional_psychology.yaml | 6 ++++++ .../style_08/a/mmlu_public_relations.yaml | 6 ++++++ .../style_08/a/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_sociology.yaml | 6 ++++++ .../style_08/a/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_08/a/mmlu_virology.yaml | 6 ++++++ .../style_08/a/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_08/b/_mmlu.yaml | 6 ++++++ .../output_variation/style_08/b/_template_yaml | 11 +++++++++++ .../style_08/b/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_astronomy.yaml | 6 ++++++ .../style_08/b/mmlu_business_ethics.yaml | 6 ++++++ .../style_08/b/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_08/b/mmlu_college_biology.yaml | 6 ++++++ .../style_08/b/mmlu_college_chemistry.yaml | 6 ++++++ .../style_08/b/mmlu_college_computer_science.yaml | 6 ++++++ .../style_08/b/mmlu_college_mathematics.yaml | 6 ++++++ .../style_08/b/mmlu_college_medicine.yaml | 6 ++++++ .../style_08/b/mmlu_college_physics.yaml | 6 ++++++ .../style_08/b/mmlu_computer_security.yaml | 6 ++++++ .../style_08/b/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_08/b/mmlu_econometrics.yaml | 6 ++++++ .../style_08/b/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_08/b/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_08/b/mmlu_formal_logic.yaml | 6 ++++++ .../style_08/b/mmlu_global_facts.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_biology.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_geography.yaml | 6 ++++++ .../b/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_physics.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_08/b/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_human_aging.yaml | 6 ++++++ .../style_08/b/mmlu_human_sexuality.yaml | 6 ++++++ .../style_08/b/mmlu_international_law.yaml | 6 ++++++ .../style_08/b/mmlu_jurisprudence.yaml | 6 ++++++ .../style_08/b/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_08/b/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_management.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_marketing.yaml | 6 ++++++ .../style_08/b/mmlu_medical_genetics.yaml | 6 ++++++ .../style_08/b/mmlu_miscellaneous.yaml | 6 ++++++ .../style_08/b/mmlu_moral_disputes.yaml | 6 ++++++ .../style_08/b/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_prehistory.yaml | 6 ++++++ .../style_08/b/mmlu_professional_accounting.yaml | 6 ++++++ .../style_08/b/mmlu_professional_law.yaml | 6 ++++++ .../style_08/b/mmlu_professional_medicine.yaml | 6 ++++++ .../style_08/b/mmlu_professional_psychology.yaml | 6 ++++++ .../style_08/b/mmlu_public_relations.yaml | 6 ++++++ .../style_08/b/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_sociology.yaml | 6 ++++++ .../style_08/b/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_08/b/mmlu_virology.yaml | 6 ++++++ .../style_08/b/mmlu_world_religions.yaml | 6 ++++++ .../output_variation/style_08/c/_mmlu.yaml | 6 ++++++ .../output_variation/style_08/c/_template_yaml | 11 +++++++++++ .../style_08/c/mmlu_abstract_algebra.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_anatomy.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_astronomy.yaml | 6 ++++++ .../style_08/c/mmlu_business_ethics.yaml | 6 ++++++ .../style_08/c/mmlu_clinical_knowledge.yaml | 6 ++++++ .../style_08/c/mmlu_college_biology.yaml | 6 ++++++ .../style_08/c/mmlu_college_chemistry.yaml | 6 ++++++ .../style_08/c/mmlu_college_computer_science.yaml | 6 ++++++ .../style_08/c/mmlu_college_mathematics.yaml | 6 ++++++ .../style_08/c/mmlu_college_medicine.yaml | 6 ++++++ .../style_08/c/mmlu_college_physics.yaml | 6 ++++++ .../style_08/c/mmlu_computer_security.yaml | 6 ++++++ .../style_08/c/mmlu_conceptual_physics.yaml | 6 ++++++ .../style_08/c/mmlu_econometrics.yaml | 6 ++++++ .../style_08/c/mmlu_electrical_engineering.yaml | 6 ++++++ .../style_08/c/mmlu_elementary_mathematics.yaml | 6 ++++++ .../style_08/c/mmlu_formal_logic.yaml | 6 ++++++ .../style_08/c/mmlu_global_facts.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_biology.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_chemistry.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_computer_science.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_european_history.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_geography.yaml | 6 ++++++ .../c/mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_mathematics.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_physics.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_psychology.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_statistics.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_us_history.yaml | 6 ++++++ .../style_08/c/mmlu_high_school_world_history.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_human_aging.yaml | 6 ++++++ .../style_08/c/mmlu_human_sexuality.yaml | 6 ++++++ .../style_08/c/mmlu_international_law.yaml | 6 ++++++ .../style_08/c/mmlu_jurisprudence.yaml | 6 ++++++ .../style_08/c/mmlu_logical_fallacies.yaml | 6 ++++++ .../style_08/c/mmlu_machine_learning.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_management.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_marketing.yaml | 6 ++++++ .../style_08/c/mmlu_medical_genetics.yaml | 6 ++++++ .../style_08/c/mmlu_miscellaneous.yaml | 6 ++++++ .../style_08/c/mmlu_moral_disputes.yaml | 6 ++++++ .../style_08/c/mmlu_moral_scenarios.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_nutrition.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_philosophy.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_prehistory.yaml | 6 ++++++ .../style_08/c/mmlu_professional_accounting.yaml | 6 ++++++ .../style_08/c/mmlu_professional_law.yaml | 6 ++++++ .../style_08/c/mmlu_professional_medicine.yaml | 6 ++++++ .../style_08/c/mmlu_professional_psychology.yaml | 6 ++++++ .../style_08/c/mmlu_public_relations.yaml | 6 ++++++ .../style_08/c/mmlu_security_studies.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_sociology.yaml | 6 ++++++ .../style_08/c/mmlu_us_foreign_policy.yaml | 6 ++++++ .../output_variation/style_08/c/mmlu_virology.yaml | 6 ++++++ .../style_08/c/mmlu_world_religions.yaml | 6 ++++++ 712 files changed, 4328 insertions(+) create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml new file mode 100644 index 00000000..da5f4d3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/_mmlu_ov_05.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_05 +task: + - mmlu_alt_ov_05a + - mmlu_alt_ov_05b + - mmlu_alt_ov_05c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml new file mode 100644 index 00000000..f70d92f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_05a +task: + - mmlu_alt_ov_05a_stem + - mmlu_alt_ov_05a_other + - mmlu_alt_ov_05a_social_sciences + - mmlu_alt_ov_05a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml new file mode 100644 index 00000000..a9cef98f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_05 +doc_to_choice: !function ../../../styles.choice_05a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..5bbb4d49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..cb410f59 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..3d29c90d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..a6f32e9a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..aac82a50 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..7b2e0958 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..1695527b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..f0f45f3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..b65992b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..7167bb76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..a5833f09 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..c656c893 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..a765f164 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..022c1e46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..417cb37d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..89ae4af9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..aa186cd6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..9c3505e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..61452db3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..707280da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..94e0a579 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..9b199ea5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..9d7bce73 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..c629273e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..964ac9d3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..3304eec0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..09b5d310 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..a26783e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..5b495c0b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..a27e82e2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..f5eb66ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..476aab63 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..a0dc09bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..d2e9162d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml new file mode 100644 index 00000000..69e41fea --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..7b98e5dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..2ef16cb9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..49f2bda5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_05a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml new file mode 100644 index 00000000..03412e62 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml new file mode 100644 index 00000000..7386ab1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..506ff69b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..42466c93 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..e544a6e8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..31d66277 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..4ef6da02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..44fb26b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..a2eb9697 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..bd268b2b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..3137eee9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..3533fde1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..e5ac7be3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..548bf1ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..07b08985 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml new file mode 100644 index 00000000..d70a37fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..8c3161cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_05a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml new file mode 100644 index 00000000..1a037bcf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_05a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..bcaad0cb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_05a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml new file mode 100644 index 00000000..ece20a94 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_05b +task: + - mmlu_alt_ov_05b_stem + - mmlu_alt_ov_05b_other + - mmlu_alt_ov_05b_social_sciences + - mmlu_alt_ov_05b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml new file mode 100644 index 00000000..b14c6288 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_05 +doc_to_choice: !function ../../../styles.choice_05b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..10731bda --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..a5bdcddf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..41d9a90d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..85e4117e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..6c905d18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..72a36377 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..f29f8d0e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..916a25a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..f9d1722c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..e95cfb00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..9b519110 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..77334a82 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..77818214 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..1aeb60f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..58ed423b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..548e0413 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..f07f5de8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..ec77b6e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..85c228e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..0e7f9ef6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..07d40a6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..74051cc5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..dfdda226 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..30ee3766 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..3ac09e8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..06ae5b54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..1565ba66 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..4a07de6f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..08cfc8bf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..aaa099bc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..29152b5d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..54647dd0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..976621d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..1a4668c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml new file mode 100644 index 00000000..0b2626c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..d818f149 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..5eff9592 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..1bbdef46 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_05b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml new file mode 100644 index 00000000..ff536418 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml new file mode 100644 index 00000000..cd07a402 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..ec286d16 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..2d1bd3ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..c1f2fc79 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..4266c2a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..286e6c7b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..84183735 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..2b1b746f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..d61861e9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..8c056a95 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..af36b4dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..8cdec089 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..30882bb2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..92b73a2a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml new file mode 100644 index 00000000..727ae1a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..4685b4a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_05b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml new file mode 100644 index 00000000..8c0e69da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_05b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..d85a0006 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_05b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml new file mode 100644 index 00000000..88041f27 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_05c +task: + - mmlu_alt_ov_05c_stem + - mmlu_alt_ov_05c_other + - mmlu_alt_ov_05c_social_sciences + - mmlu_alt_ov_05c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml new file mode 100644 index 00000000..82259f03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_05 +doc_to_choice: !function ../../../styles.choice_05c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..4d6b10f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..5a04c43d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..f925929f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..1bff1ab0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..abeee50a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..d85f4238 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..fd2f672f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..02d07d5f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..0e18b953 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..d68d7a3d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..f200a990 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..aa926444 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..7cb1151f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..6253cb57 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..f75f1037 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..f19f9904 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..a8b8c1d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..85993966 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..b3ba98ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..7f25e96b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..a715a00e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..6e9921ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..3523e372 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..62010d86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..1b1c566a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..db8370a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..9b2c003c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..697d7af6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..e758a3a9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..93b2f70d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..a30146a0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..b3868797 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..0b3a0478 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..d425bcc2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml new file mode 100644 index 00000000..13b5a716 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..8bbf9823 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..315af4b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..ab99a814 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_05c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml new file mode 100644 index 00000000..16715d6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml new file mode 100644 index 00000000..c31dba8a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..bc136da9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..3ad9a519 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..ef20ece5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..83d2ebbe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..2cfb0845 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..9e015ff1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..9656cfe5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..4ecbe7cf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..355bbdb7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..c20f3c71 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..54cdb1ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..8a8123c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..8ed204c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml new file mode 100644 index 00000000..24f6ac68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..027f9842 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_05c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml new file mode 100644 index 00000000..d33a4fef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_05c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..14d1d836 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_05/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_05c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_05c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml new file mode 100644 index 00000000..c5e53e73 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/_mmlu_ov_06.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_06 +task: + - mmlu_alt_ov_06a + - mmlu_alt_ov_06b + - mmlu_alt_ov_06c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml new file mode 100644 index 00000000..718f8375 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_06a +task: + - mmlu_alt_ov_06a_stem + - mmlu_alt_ov_06a_other + - mmlu_alt_ov_06a_social_sciences + - mmlu_alt_ov_06a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml new file mode 100644 index 00000000..5269f145 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_06 +doc_to_choice: !function ../../../styles.choice_06a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..4e50dad1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..30b360b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..a766a6e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..6f8f47b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..1900d331 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..de67fe09 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..9bc65269 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..94ad7a72 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..f536b141 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..5dd00361 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..b27f03ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..03ac40dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..b57076e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..479795ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..3659eea6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..551cb12e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..d796f826 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..d86149e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..67cc26ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..9a4e2e0c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..8735604d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..4d6d7008 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..af9df9f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..7ef68e44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f2348869 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..1d67a878 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..4d571b4c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..27c50c84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..e7062280 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..fee1a14b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..6e77d57d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..938a13aa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..1e8b1113 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..80840d8d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml new file mode 100644 index 00000000..a4efe485 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..31195387 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..724ac307 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..dc2b7f06 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_06a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml new file mode 100644 index 00000000..c5c1cb69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml new file mode 100644 index 00000000..635b5dbc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..0100fec1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..05172964 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..6f53f46b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..c551b61a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..63b9fbf5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..942ac9e6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..ca6b4e37 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..9fc815a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..ed08dc12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..8542b318 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..85b49a34 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..1cf7dd2a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..7fe72f47 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml new file mode 100644 index 00000000..0c034af4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..3f915db0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_06a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml new file mode 100644 index 00000000..db1a318b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_06a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..b39f57a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_06a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml new file mode 100644 index 00000000..76001890 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_06b +task: + - mmlu_alt_ov_06b_stem + - mmlu_alt_ov_06b_other + - mmlu_alt_ov_06b_social_sciences + - mmlu_alt_ov_06b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml new file mode 100644 index 00000000..fc649845 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_06 +doc_to_choice: !function ../../../styles.choice_06b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..6179fecd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..8dc6e13e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..e3cb4008 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..73bca569 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..004d1f96 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..f1f1b9d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..d2cb94a4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..dafba5c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..17fea247 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..b654b53d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..e3e20c54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..b59eaf74 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..17a7f402 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..67fd25f1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..09682ca0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..332a7695 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..9812ff4e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..af72cb2c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..9ce58fa3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..15f2a06c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..e2d857d4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..51491ad2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..b3bdd3a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..84250124 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..7b149fd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..73ded429 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..e26eb966 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..12f27df9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..9f8c4f3c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..4b3547ff --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..613b44a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..ed0897c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..dc4a6b07 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..d8187a12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml new file mode 100644 index 00000000..933f50b8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..ae748888 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..51de0bab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..cbc36e6d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_06b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml new file mode 100644 index 00000000..0faec6dd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml new file mode 100644 index 00000000..c690b7c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..169412c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..0fda3a54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..5289c359 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..db749c8c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..c33d1bdc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..247eab86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..eb87bb71 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..e65e27de --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..f8566534 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..c9ab6259 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..08dccb1c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..6ac634d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..488ebfcf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml new file mode 100644 index 00000000..f36a6135 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..4c4a543a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_06b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml new file mode 100644 index 00000000..a11b29e3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_06b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..497b55b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_06b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml new file mode 100644 index 00000000..1397600a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_06c +task: + - mmlu_alt_ov_06c_stem + - mmlu_alt_ov_06c_other + - mmlu_alt_ov_06c_social_sciences + - mmlu_alt_ov_06c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml new file mode 100644 index 00000000..414b7b25 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_06 +doc_to_choice: !function ../../../styles.choice_06c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..69c61fe4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..42b30178 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..7e984f02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..a0ba2227 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..21789723 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..c57937fd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..ec529c9b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..88c3565c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..56395088 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..c1584be7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..07e4753c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..2f48cd9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..f66d241b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..38b1149f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..4a486072 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..f2db4ce6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..9ebec057 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..9676d885 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..30fe645b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..86251d8b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..ab05057b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..f2c07f9b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..0bc8c6a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..47308b25 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b1687478 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..b950a7a0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..38eeef56 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..15da1569 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..975d34ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..211ff0d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..7ddb5bcf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..8900bdec --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..f8c03c6a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..809864af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml new file mode 100644 index 00000000..5fdd16f9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..70ad5e92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..65822b08 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..84ca2350 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_06c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml new file mode 100644 index 00000000..4f442a5a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml new file mode 100644 index 00000000..9fd75423 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..2b1ae607 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..dc5ea5d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..24e16395 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..c267865e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..ee5bc7da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..ac32db39 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..d8ed3548 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..ce412083 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..ca648a03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..57a16e01 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..728c3325 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..3a0c30d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..f396a27f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml new file mode 100644 index 00000000..ab945977 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..118e3334 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_06c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml new file mode 100644 index 00000000..ee29cc4f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_06c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..c48c15b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_06/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_06c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_06c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml new file mode 100644 index 00000000..84452734 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/_mmlu_ov_07.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_07 +task: + - mmlu_alt_ov_07a + - mmlu_alt_ov_07b + - mmlu_alt_ov_07c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml new file mode 100644 index 00000000..e7885ae7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_07a +task: + - mmlu_alt_ov_07a_stem + - mmlu_alt_ov_07a_other + - mmlu_alt_ov_07a_social_sciences + - mmlu_alt_ov_07a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml new file mode 100644 index 00000000..a7932f22 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_07 +doc_to_choice: !function ../../../styles.choice_07a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..3753f10f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..a8ce274c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..daa8c358 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..30f52843 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..c5bce599 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..fd25e096 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..c7fec3a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..e3c5db69 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..7a30111e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..2148d7f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..8c34ca5f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..0aeba991 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..bc922c0c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..b422078c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..5dccde22 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..b28774fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..7079d212 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..20adac4b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..f4ce4a9f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..feeeb165 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..4282ecf1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..6a66e634 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..43b64f90 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..f2c019dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..89d58a24 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..49cb05ae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..bee68ebf --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..70df43b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..919ea53b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..a76715c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..28f26f92 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..ca94340a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..f97857ef --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..389af64c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml new file mode 100644 index 00000000..0cea2709 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..e274b3b3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..a648526f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..27f5bce7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_07a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml new file mode 100644 index 00000000..3dfe35e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml new file mode 100644 index 00000000..c9bca369 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..72f76523 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..9db2879e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..87022179 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..9adcb5ed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..dc1b458e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..f6732247 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..c0b82dd0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..86442333 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..22923265 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..b82c6937 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..7099ca98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..62cae6ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..9934c868 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml new file mode 100644 index 00000000..fe3c7c4f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..34c3a507 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_07a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml new file mode 100644 index 00000000..fcca39be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_07a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..1285fe75 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_07a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml new file mode 100644 index 00000000..5a289391 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_07b +task: + - mmlu_alt_ov_07b_stem + - mmlu_alt_ov_07b_other + - mmlu_alt_ov_07b_social_sciences + - mmlu_alt_ov_07b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml new file mode 100644 index 00000000..344e8b49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_07 +doc_to_choice: !function ../../../styles.choice_07b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..547cec13 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..449d874d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..e1db708b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..d3625651 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..3efeccc0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..d363aa61 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..4531065c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..61f6f3fe --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..272afa30 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..7a57600b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..1b1a4fcd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..a5c573a7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..5a26c2be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..232a53b0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..604ff995 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..1fdb87d9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..67954527 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..2a420c03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..168b8ee5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..bbb34bc6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..5b34eb02 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..5000f182 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..220495c3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..d8591ffa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f7f126f9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..f7ca038f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..5b4748ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..a7148254 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..23f74d98 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..5598a140 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..75d0cdba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..8b36f15e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..e8657799 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..eb8a55f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml new file mode 100644 index 00000000..7c141de2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..653a8908 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..585affca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..309b04f6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_07b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml new file mode 100644 index 00000000..5c931d2c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml new file mode 100644 index 00000000..66c302b4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..98f1baa7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..8a0e7f84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..2f0acfb7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..17913ee9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..697b6654 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..0fdccc40 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..9d1d6a53 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..1499c6c5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..a90140dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..ad89f65a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..0fe83f29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..a92339b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..f03ac543 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml new file mode 100644 index 00000000..20c98b44 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..4e9f9a59 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_07b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml new file mode 100644 index 00000000..4e238d2c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_07b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..5698c38f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_07b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml new file mode 100644 index 00000000..df79e5c8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_07c +task: + - mmlu_alt_ov_07c_stem + - mmlu_alt_ov_07c_other + - mmlu_alt_ov_07c_social_sciences + - mmlu_alt_ov_07c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml new file mode 100644 index 00000000..e92a33eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_07 +doc_to_choice: !function ../../../styles.choice_07c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..686388ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..c9313eaa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..47b67045 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..d3097b9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..743516b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..d79b192e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..6c082e4d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..a8566fed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..b765635f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..90eb026a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..311a6073 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..f63dccbd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..ef2a5eac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..2c317710 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..ce3b5fed --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..86dcd1ce --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..ebb40523 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..e9cfc2ee --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..c73d11f4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..b5063b3f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..b3f3f7b6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..6a3647fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..fc022f25 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..9a205499 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..84125faa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..147f21fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..3d58ea11 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..59ee2a83 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..aa344a82 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..689675fb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..142e6294 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..d2113b49 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..22fdd806 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..93d7fb13 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml new file mode 100644 index 00000000..acf5f6f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..3f77c137 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..139b184d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..b8559c91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_07c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml new file mode 100644 index 00000000..532f6884 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml new file mode 100644 index 00000000..5ce07359 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..fca68ad1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..ca70e3be --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..e79edbdd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..bf0d4b52 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..46c5c29b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..12f53153 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..9f6002af --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..6c2f6286 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..709ca844 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..ea310279 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..3cbb0207 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..87c9d318 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..757e716f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml new file mode 100644 index 00000000..2a1244d5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..00effdcd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_07c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml new file mode 100644 index 00000000..238eb61d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_07c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..e1df0e03 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_07/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_07c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_07c_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml new file mode 100644 index 00000000..378988db --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/_mmlu_ov_08.yaml @@ -0,0 +1,5 @@ +group: mmlu_alt_ov_08 +task: + - mmlu_alt_ov_08a + - mmlu_alt_ov_08b + - mmlu_alt_ov_08c diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml new file mode 100644 index 00000000..c62eaec2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_08a +task: + - mmlu_alt_ov_08a_stem + - mmlu_alt_ov_08a_other + - mmlu_alt_ov_08a_social_sciences + - mmlu_alt_ov_08a_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml new file mode 100644 index 00000000..544c5697 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_08 +doc_to_choice: !function ../../../styles.choice_08a +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..c9d2d056 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml new file mode 100644 index 00000000..7981115c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml new file mode 100644 index 00000000..690b3e18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml new file mode 100644 index 00000000..80304895 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..fbd4943f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml new file mode 100644 index 00000000..9779430a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..a87d02a6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..c65f4558 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..298c9f74 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml new file mode 100644 index 00000000..4efc1b50 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml new file mode 100644 index 00000000..13dda481 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml new file mode 100644 index 00000000..deab7c15 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..04abf253 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml new file mode 100644 index 00000000..3ea7b4eb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..8f0cc66b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..b9f10a84 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml new file mode 100644 index 00000000..ba76ba4b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml new file mode 100644 index 00000000..15e2cef9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..968d1f62 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..cdab33e4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..ac659b8f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..c89afe32 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..97443442 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..37d4c369 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fe3c4dae --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..5de10c18 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..5eba0428 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..a61f7c97 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..88bf97c0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..1b0cb33c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..277c86b7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..d223949a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml new file mode 100644 index 00000000..2cfbfef6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..0867836d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml new file mode 100644 index 00000000..31362f29 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..fcb7b715 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..6e5e242c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml new file mode 100644 index 00000000..67da0afd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_08a_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml new file mode 100644 index 00000000..fc962a53 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml new file mode 100644 index 00000000..bfff977d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..5d68da86 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..c131112d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..d7daf045 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..848b4cb1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml new file mode 100644 index 00000000..5f3dd18d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml new file mode 100644 index 00000000..099f15f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml new file mode 100644 index 00000000..02bbfa61 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..b39e93c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml new file mode 100644 index 00000000..7b39ebf9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..4c70ad68 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..fb06ce62 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml new file mode 100644 index 00000000..3fe438c2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml new file mode 100644 index 00000000..0794c491 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml new file mode 100644 index 00000000..1f9fc02a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..77bbe90a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_08a_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml new file mode 100644 index 00000000..da2aa761 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_08a_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml new file mode 100644 index 00000000..1d636c34 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/a/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_08a_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08a_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml new file mode 100644 index 00000000..f872f7cd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_08b +task: + - mmlu_alt_ov_08b_stem + - mmlu_alt_ov_08b_other + - mmlu_alt_ov_08b_social_sciences + - mmlu_alt_ov_08b_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml new file mode 100644 index 00000000..fd9d94df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_08 +doc_to_choice: !function ../../../styles.choice_08b +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..a0fbe9c6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml new file mode 100644 index 00000000..58bfd0b1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml new file mode 100644 index 00000000..9eca5e1f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml new file mode 100644 index 00000000..36995b7d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..56123d3b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml new file mode 100644 index 00000000..13e57ee3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..9734b443 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..fc442741 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..2d89509d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml new file mode 100644 index 00000000..19dbfc12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml new file mode 100644 index 00000000..6d738349 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml new file mode 100644 index 00000000..c02dbf14 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..185ee5bd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml new file mode 100644 index 00000000..ab318dac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..1efd8a5e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..f36aeec4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml new file mode 100644 index 00000000..d5556b88 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml new file mode 100644 index 00000000..75cd9512 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..1474b407 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..060a490e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..355109d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..360c30d8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..693f58ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..999ff848 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..18d61f39 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..f3f23469 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..c4bc1bbb --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..0659a800 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..803e8b12 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..5429f118 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..495f7b79 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..3dab44fa --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml new file mode 100644 index 00000000..d5b9aee9 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..213f13df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml new file mode 100644 index 00000000..2cfe6b5c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..191d570b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..331e54ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml new file mode 100644 index 00000000..9232faca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_08b_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml new file mode 100644 index 00000000..89cc432b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml new file mode 100644 index 00000000..e06b0100 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..f06299df --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..10456aa1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..5057f1e0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..78ed268a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml new file mode 100644 index 00000000..89bd98f0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml new file mode 100644 index 00000000..d8566dcd --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml new file mode 100644 index 00000000..f34e833c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..fabe6f80 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml new file mode 100644 index 00000000..dace16a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..8b49a0b5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..9c785893 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml new file mode 100644 index 00000000..8401a770 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml new file mode 100644 index 00000000..bc4cc164 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml new file mode 100644 index 00000000..b7124605 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..11fcb95f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_08b_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml new file mode 100644 index 00000000..de8b1fd0 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_08b_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml new file mode 100644 index 00000000..b4d5a5d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/b/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_08b_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08b_world_religions" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml new file mode 100644 index 00000000..cd6e248b --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_alt_ov_08c +task: + - mmlu_alt_ov_08c_stem + - mmlu_alt_ov_08c_other + - mmlu_alt_ov_08c_social_sciences + - mmlu_alt_ov_08c_humanities diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml new file mode 100644 index 00000000..fccff023 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train +test_split: test +fewshot_split: dev +output_type: multiple_choice +doc_to_text: !function ../../../styles.template_08 +doc_to_choice: !function ../../../styles.choice_08c +doc_to_target: answer +metric_list: + - metric: acc + - metric: acc_norm + - metric: brier_score diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml new file mode 100644 index 00000000..28cad616 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are multiple choice questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml new file mode 100644 index 00000000..118593a2 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are multiple choice questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_anatomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml new file mode 100644 index 00000000..11dedec1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are multiple choice questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_astronomy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml new file mode 100644 index 00000000..49c7bf7f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are multiple choice questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_business_ethics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml new file mode 100644 index 00000000..8722c2f8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are multiple choice questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml new file mode 100644 index 00000000..fc4ccd5e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are multiple choice questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml new file mode 100644 index 00000000..f4690aba --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are multiple choice questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml new file mode 100644 index 00000000..03d4edfc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are multiple choice questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml new file mode 100644 index 00000000..268fd6dc --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are multiple choice questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml new file mode 100644 index 00000000..eb5cd9da --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are multiple choice questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml new file mode 100644 index 00000000..1da6f78a --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are multiple choice questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_college_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml new file mode 100644 index 00000000..cf299685 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are multiple choice questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_computer_security" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml new file mode 100644 index 00000000..e0134277 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are multiple choice questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml new file mode 100644 index 00000000..184e057e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are multiple choice questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_econometrics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml new file mode 100644 index 00000000..6a539547 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are multiple choice questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml new file mode 100644 index 00000000..65e9f81e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are multiple choice questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml new file mode 100644 index 00000000..21ac6a0d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are multiple choice questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_formal_logic" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml new file mode 100644 index 00000000..c0eb40ac --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are multiple choice questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_global_facts" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml new file mode 100644 index 00000000..37e54dd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_biology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml new file mode 100644 index 00000000..de1c42ab --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are multiple choice questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml new file mode 100644 index 00000000..b4aa54c4 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are multiple choice questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml new file mode 100644 index 00000000..8bc00484 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml new file mode 100644 index 00000000..bac6e812 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are multiple choice questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_geography" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 00000000..1b9de7a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 00000000..c7036e9c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml new file mode 100644 index 00000000..6952d358 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml new file mode 100644 index 00000000..13c836ca --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml new file mode 100644 index 00000000..89bdcf54 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_physics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml new file mode 100644 index 00000000..58b2b077 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are multiple choice questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml new file mode 100644 index 00000000..811af4a1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are multiple choice questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml new file mode 100644 index 00000000..6fdc9483 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml new file mode 100644 index 00000000..f39b8330 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are multiple choice questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml new file mode 100644 index 00000000..5e452b51 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are multiple choice questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_human_aging" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml new file mode 100644 index 00000000..05804591 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are multiple choice questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_human_sexuality" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml new file mode 100644 index 00000000..9798c465 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are multiple choice questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_international_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml new file mode 100644 index 00000000..5bc1236c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_jurisprudence" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml new file mode 100644 index 00000000..659afcc6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are multiple choice questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml new file mode 100644 index 00000000..34e70b76 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are multiple choice questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_alt_ov_08c_stem" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_machine_learning" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml new file mode 100644 index 00000000..734aa96c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are multiple choice questions (with answers) about management.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_management" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml new file mode 100644 index 00000000..c36b4750 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are multiple choice questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_marketing" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml new file mode 100644 index 00000000..53d0fab7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are multiple choice questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_medical_genetics" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml new file mode 100644 index 00000000..8d994dd5 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_miscellaneous" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml new file mode 100644 index 00000000..ac8daaf8 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are multiple choice questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_moral_disputes" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml new file mode 100644 index 00000000..e2e5fe42 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are multiple choice questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml new file mode 100644 index 00000000..4ad5a3f7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are multiple choice questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_nutrition" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml new file mode 100644 index 00000000..16994446 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are multiple choice questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_philosophy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml new file mode 100644 index 00000000..23e327d1 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are multiple choice questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_prehistory" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml new file mode 100644 index 00000000..9515d77d --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are multiple choice questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_accounting" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml new file mode 100644 index 00000000..d6187d91 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are multiple choice questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_law" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml new file mode 100644 index 00000000..00a2136c --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are multiple choice questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_medicine" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml new file mode 100644 index 00000000..898eb37e --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are multiple choice questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_professional_psychology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml new file mode 100644 index 00000000..f00eb973 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are multiple choice questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_public_relations" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml new file mode 100644 index 00000000..0ec582d6 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are multiple choice questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_security_studies" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml new file mode 100644 index 00000000..a06d4e2f --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are multiple choice questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_sociology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml new file mode 100644 index 00000000..b48618f3 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are multiple choice questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_alt_ov_08c_social_sciences" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml new file mode 100644 index 00000000..8b0cfce7 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are multiple choice questions (with answers) about virology.\n\ + \n" +"group": "mmlu_alt_ov_08c_other" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_virology" diff --git a/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml new file mode 100644 index 00000000..8ef51c00 --- /dev/null +++ b/lm_eval/tasks/mmlu/alternative_worlds/output_variation/style_08/c/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are multiple choice questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_alt_ov_08c_humanities" +"include": "_template_yaml" +"task": "mmlu_alt_ov_08c_world_religions" -- GitLab From 4bff76d5b4ef62ebfca1ede2671ffcc234b5607c Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 11:27:41 +0000 Subject: [PATCH 40/50] fixed stderr for metrics like brier_score --- lm_eval/evaluator.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index b9af4aa7..ad124f13 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -487,16 +487,24 @@ def evaluate( # For unweighted averaging, use: # current_size = 1 + # TODO: Tasks like brier score for individual + # tasks have no stderr since the score is + # itself an aggregation. But it's possible to + # calculate the stderr over groups + all_stderr = [] for metric in [ key for key in metrics.keys() if "_stderr" not in key ]: stderr = "_stderr,".join(metric.split(",")) stderr_score = results[task][stderr] - var_score = stderr_score**2 - metric_score = results[task][metric] + if stderr_score == "N/A": + var_score = "N/A" + else: + var_score = stderr_score**2 + all_stderr.append(stderr) - all_stderr.append(stderr) + metric_score = results[task][metric] if metric in results[group]: results[group][metric] = ( @@ -504,17 +512,20 @@ def evaluate( + metric_score * current_size ) / (total_size + current_size) # $$s_z^2 = \frac{(n-1) s_x^2 + (m-1) s_y^2}{n+m-1} + \frac{nm(\bar x - \bar y)^2}{(n+m)(n+m-1)}.$$ - results[group][stderr] = ( - (total_size - 1) * results[group][stderr] - + (current_size - 1) * var_score - ) / ( - total_size + current_size - 1 - ) + total_size * current_size / ( - (total_size + current_size) - * (total_size + current_size - 1) - ) * ( - results[group][metric] - metric_score - ) ** 2 + if var_score == "N/A": + results[group][stderr] = "N/A" + else: + results[group][stderr] = ( + (total_size - 1) * results[group][stderr] + + (current_size - 1) * var_score + ) / ( + total_size + current_size - 1 + ) + total_size * current_size / ( + (total_size + current_size) + * (total_size + current_size - 1) + ) * ( + results[group][metric] - metric_score + ) ** 2 else: results[group][metric] = metric_score results[group][stderr] = var_score -- GitLab From 5096772548ce1e72f401e3d80f771bfe0ee11ae2 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 13 Dec 2023 11:28:18 +0000 Subject: [PATCH 41/50] fixed style --- .../mmlu/alternative_worlds/mmlu_output_variation.yaml | 2 +- lm_eval/tasks/mmlu/alternative_worlds/styles.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml index 3564b3a3..28c5caa9 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml +++ b/lm_eval/tasks/mmlu/alternative_worlds/mmlu_output_variation.yaml @@ -7,4 +7,4 @@ task: - mmlu_alt_ov_05 - mmlu_alt_ov_06 - mmlu_alt_ov_07 - - mmlu_alt_ov_08 \ No newline at end of file + - mmlu_alt_ov_08 diff --git a/lm_eval/tasks/mmlu/alternative_worlds/styles.py b/lm_eval/tasks/mmlu/alternative_worlds/styles.py index b64d5f46..c7a33ea0 100644 --- a/lm_eval/tasks/mmlu/alternative_worlds/styles.py +++ b/lm_eval/tasks/mmlu/alternative_worlds/styles.py @@ -4,7 +4,7 @@ from functools import partial def doc_to_text_base(alphabet, style, doc): - choices = doc["choices"]["text"] + choices = doc["choices"] num = len(choices) letter_list = [style.format(letter) for letter in alphabet[0:num]] @@ -26,13 +26,13 @@ def doc_to_text_base(alphabet, style, doc): # Full continuation def choice_A(doc): - return doc["choices"]["text"] + return doc["choices"] # Letters only def choice_B(alphabet, style, doc): - choices = doc["choices"]["text"] + choices = doc["choices"] num = len(choices) letter_list = [style.format(letter) for letter in alphabet[0:num]] @@ -45,7 +45,7 @@ def choice_B(alphabet, style, doc): # Letters + Full continuation def choice_C(alphabet, style, doc): - choices = doc["choices"]["text"] + choices = doc["choices"] num = len(choices) letter_list = [style.format(letter) for letter in alphabet[0:num]] -- GitLab From 4efa0b6dd99db9a8888dd7f83e77a1e831426750 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 15 Dec 2023 15:46:50 +0000 Subject: [PATCH 42/50] removed group and task from yaml template --- .../arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml index 633826a2..b17b217e 100644 --- a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml +++ b/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml @@ -1,6 +1,3 @@ -group: - - ai2_arc -task: arc_easy dataset_path: ai2_arc dataset_name: ARC-Easy output_type: multiple_choice -- GitLab From 386d63ea1d6d6c94b3a9f86e206cd5dcb2f88f51 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 15 Dec 2023 15:47:16 +0000 Subject: [PATCH 43/50] fixed brier_score to allow multi-gpu inference --- lm_eval/api/metrics.py | 3 +-- lm_eval/api/task.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py index 27304cae..98187ee4 100644 --- a/lm_eval/api/metrics.py +++ b/lm_eval/api/metrics.py @@ -126,8 +126,7 @@ def brier_score(items): # This is a passthrough function for g, p in zip(gold_group.values(), pred_group.values()): _p = np.array(p) _g = np.array(g) - _g_one_hot = np.eye(len(_p[0]))[_g] - average += np.mean(np.sum((_p - _g_one_hot) ** 2, axis=1)) * len(_g) + average += np.mean(np.sum((_p - _g) ** 2, axis=1)) * len(_g) total_size += len(_g) return average / total_size diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index 91fa7ac1..36f331b2 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1116,7 +1116,8 @@ class ConfigurableTask(Task): **({"acc_norm": acc_norm} if "acc_norm" in use_metric else {}), **({"exact_match": exact_match} if "exact_match" in use_metric else {}), **( - {"brier_score": (gold, prob_norm)} + # {"brier_score": (gold, prob_norm)} + {"brier_score": [np.eye(len(prob_norm))[gold], prob_norm]} if "brier_score" in use_metric else {} ), -- GitLab From c027bc92d9dd8702988c7599f619c68a594d7e0b Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Fri, 15 Dec 2023 15:47:59 +0000 Subject: [PATCH 44/50] format --- lm_eval/evaluator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lm_eval/evaluator.py b/lm_eval/evaluator.py index ad124f13..7d3dbc92 100644 --- a/lm_eval/evaluator.py +++ b/lm_eval/evaluator.py @@ -487,9 +487,9 @@ def evaluate( # For unweighted averaging, use: # current_size = 1 - # TODO: Tasks like brier score for individual - # tasks have no stderr since the score is - # itself an aggregation. But it's possible to + # TODO: Tasks like brier score for individual + # tasks have no stderr since the score is + # itself an aggregation. But it's possible to # calculate the stderr over groups all_stderr = [] -- GitLab From f38c74698cd2ff6ec505f46acb1ada104f8d9afa Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 27 Dec 2023 06:43:45 +0000 Subject: [PATCH 45/50] split to easy and challenge --- .../{ => arc_challenge}/README.md | 0 .../output_variation/_arc_challenge_alt_yaml | 23 +++++ .../output_variation/arc_challenge_alt.yaml | 10 +++ .../output_variation/style_01/a.yaml | 6 ++ .../output_variation/style_01/b.yaml | 6 ++ .../output_variation/style_01/c.yaml | 6 ++ .../output_variation/style_02/a.yaml | 6 ++ .../output_variation/style_02/b.yaml | 6 ++ .../output_variation/style_02/c.yaml | 6 ++ .../output_variation/style_03/a.yaml | 6 ++ .../output_variation/style_03/b.yaml | 6 ++ .../output_variation/style_03/c.yaml | 6 ++ .../output_variation/style_04/a.yaml | 6 ++ .../output_variation/style_04/b.yaml | 6 ++ .../output_variation/style_04/c.yaml | 6 ++ .../output_variation/style_05/a.yaml | 6 ++ .../output_variation/style_05/b.yaml | 6 ++ .../output_variation/style_05/c.yaml | 6 ++ .../output_variation/style_06/a.yaml | 6 ++ .../output_variation/style_06/b.yaml | 6 ++ .../output_variation/style_06/c.yaml | 6 ++ .../output_variation/style_07/a.yaml | 6 ++ .../output_variation/style_07/b.yaml | 6 ++ .../output_variation/style_07/c.yaml | 6 ++ .../output_variation/style_08/a.yaml | 6 ++ .../output_variation/style_08/b.yaml | 6 ++ .../output_variation/style_08/c.yaml | 6 ++ .../output_variation/styles.py | 0 .../prompt_variation/_arc_challenge_alt_yaml | 21 +++++ .../prompt_variation/style_01.yaml | 5 ++ .../prompt_variation/style_02.yaml | 5 ++ .../prompt_variation/style_03.yaml | 5 ++ .../arc/alternative_worlds/arc_easy/README.md | 20 +++++ .../output_variation/_arc_easy_alt_yaml | 0 .../output_variation/arc_easy_alt.yaml | 0 .../output_variation/style_01/a.yaml | 0 .../output_variation/style_01/b.yaml | 0 .../output_variation/style_01/c.yaml | 0 .../output_variation/style_02/a.yaml | 0 .../output_variation/style_02/b.yaml | 0 .../output_variation/style_02/c.yaml | 0 .../output_variation/style_03/a.yaml | 0 .../output_variation/style_03/b.yaml | 0 .../output_variation/style_03/c.yaml | 0 .../output_variation/style_04/a.yaml | 0 .../output_variation/style_04/b.yaml | 0 .../output_variation/style_04/c.yaml | 0 .../output_variation/style_05/a.yaml | 0 .../output_variation/style_05/b.yaml | 0 .../output_variation/style_05/c.yaml | 0 .../output_variation/style_06/a.yaml | 0 .../output_variation/style_06/b.yaml | 0 .../output_variation/style_06/c.yaml | 0 .../output_variation/style_07/a.yaml | 0 .../output_variation/style_07/b.yaml | 0 .../output_variation/style_07/c.yaml | 0 .../output_variation/style_08/a.yaml | 0 .../output_variation/style_08/b.yaml | 0 .../output_variation/style_08/c.yaml | 0 .../arc_easy/output_variation/styles.py | 90 +++++++++++++++++++ .../prompt_variation/_arc_easy_alt_yaml | 0 .../prompt_variation/style_01.yaml | 0 .../prompt_variation/style_02.yaml | 0 .../prompt_variation/style_03.yaml | 0 64 files changed, 323 insertions(+) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_challenge}/README.md (100%) create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml rename lm_eval/tasks/arc/alternative_worlds/{ => arc_challenge}/output_variation/styles.py (100%) create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/_arc_easy_alt_yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/arc_easy_alt.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_01/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_01/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_01/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_02/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_02/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_02/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_03/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_03/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_03/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_04/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_04/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_04/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_05/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_05/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_05/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_06/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_06/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_06/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_07/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_07/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_07/c.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_08/a.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_08/b.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/output_variation/style_08/c.yaml (100%) create mode 100644 lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/prompt_variation/_arc_easy_alt_yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/prompt_variation/style_01.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/prompt_variation/style_02.yaml (100%) rename lm_eval/tasks/arc/alternative_worlds/{ => arc_easy}/prompt_variation/style_03.yaml (100%) diff --git a/lm_eval/tasks/arc/alternative_worlds/README.md b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/README.md similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/README.md rename to lm_eval/tasks/arc/alternative_worlds/arc_challenge/README.md diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml new file mode 100644 index 00000000..6266a6fa --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/_arc_challenge_alt_yaml @@ -0,0 +1,23 @@ +group: + - ai2_arc +dataset_path: ai2_arc +dataset_name: ARC-Challenge +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml new file mode 100644 index 00000000..128fec5f --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/arc_challenge_alt.yaml @@ -0,0 +1,10 @@ +group: arc_challenge_alt_ov +task: + - arc_challenge_alt_ov_01 + - arc_challenge_alt_ov_02 + - arc_challenge_alt_ov_03 + - arc_challenge_alt_ov_04 + - arc_challenge_alt_ov_05 + - arc_challenge_alt_ov_06 + - arc_challenge_alt_ov_07 + - arc_challenge_alt_ov_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml new file mode 100644 index 00000000..e2fe0438 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_01 +task: arc_challenge_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml new file mode 100644 index 00000000..f99170f6 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_01 +task: arc_challenge_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml new file mode 100644 index 00000000..f985df9e --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_01 +task: arc_challenge_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml new file mode 100644 index 00000000..b4ef10d2 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_02 +task: arc_challenge_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml new file mode 100644 index 00000000..05066f22 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_02 +task: arc_challenge_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml new file mode 100644 index 00000000..ce04fda3 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_02 +task: arc_challenge_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml new file mode 100644 index 00000000..864a7912 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_03 +task: arc_challenge_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml new file mode 100644 index 00000000..c9c2c9d9 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_03 +task: arc_challenge_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml new file mode 100644 index 00000000..4e9d0397 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_03 +task: arc_challenge_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml new file mode 100644 index 00000000..8061081d --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_04 +task: arc_challenge_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml new file mode 100644 index 00000000..91c868a8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_04 +task: arc_challenge_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml new file mode 100644 index 00000000..98581583 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_04 +task: arc_challenge_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml new file mode 100644 index 00000000..04fccae4 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_05 +task: arc_challenge_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml new file mode 100644 index 00000000..eb2a6419 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_05 +task: arc_challenge_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml new file mode 100644 index 00000000..757f1c25 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_05 +task: arc_challenge_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml new file mode 100644 index 00000000..c3f2535e --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_06 +task: arc_challenge_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml new file mode 100644 index 00000000..d36f1f1a --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_06 +task: arc_challenge_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml new file mode 100644 index 00000000..fcbfd04c --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_06 +task: arc_challenge_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml new file mode 100644 index 00000000..04f6e742 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_07 +task: arc_challenge_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml new file mode 100644 index 00000000..1817632a --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_07 +task: arc_challenge_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml new file mode 100644 index 00000000..66c376a5 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_07 +task: arc_challenge_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml new file mode 100644 index 00000000..da9fe33d --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_08 +task: arc_challenge_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml new file mode 100644 index 00000000..8fc7c7e8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_08 +task: arc_challenge_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml new file mode 100644 index 00000000..9a12a0b2 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_arc_challenge_alt_yaml +group: arc_challenge_alt_ov_08 +task: arc_challenge_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/styles.py similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/styles.py rename to lm_eval/tasks/arc/alternative_worlds/arc_challenge/output_variation/styles.py diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml new file mode 100644 index 00000000..e8ab9268 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/_arc_challenge_alt_yaml @@ -0,0 +1,21 @@ +dataset_path: ai2_arc +dataset_name: ARC-Challenge +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml new file mode 100644 index 00000000..e113e7c8 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _arc_challenge_alt_yaml +group: arc_challenge_alt_pv +task: arc_challenge_alt_pv_01 +doc_to_text: "{{question}}" +doc_to_decontamination_query: "{{question}}" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml new file mode 100644 index 00000000..c808c137 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _arc_challenge_alt_yaml +group: arc_challenge_alt_pv +task: arc_challenge_alt_pv_02 +doc_to_text: "Q: {{question}}\nA:" +doc_to_decontamination_query: "Q: {{question}}\nA:" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml new file mode 100644 index 00000000..657ed98b --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_challenge/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _arc_challenge_alt_yaml +group: arc_challenge_alt_pv +task: arc_challenge_alt_pv_03 +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md b/lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md new file mode 100644 index 00000000..93600ae1 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/_arc_easy_alt_yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/_arc_easy_alt_yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/_arc_easy_alt_yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/arc_easy_alt.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/arc_easy_alt.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/arc_easy_alt.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/arc_easy_alt.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_01/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_01/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_02/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_02/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_03/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_03/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_04/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_04/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_05/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_05/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_06/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_06/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_07/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_07/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/a.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/a.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/a.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/b.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/b.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/b.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/c.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/output_variation/style_08/c.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/style_08/c.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py new file mode 100644 index 00000000..a4e61348 --- /dev/null +++ b/lm_eval/tasks/arc/alternative_worlds/arc_easy/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = doc["choices"]["text"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/_arc_easy_alt_yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/prompt_variation/_arc_easy_alt_yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/_arc_easy_alt_yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_01.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_01.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_01.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_02.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_02.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_02.yaml diff --git a/lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_03.yaml similarity index 100% rename from lm_eval/tasks/arc/alternative_worlds/prompt_variation/style_03.yaml rename to lm_eval/tasks/arc/alternative_worlds/arc_easy/prompt_variation/style_03.yaml -- GitLab From 4277840b5a5a1734b0560143ea947795fa062bf5 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 27 Dec 2023 06:44:00 +0000 Subject: [PATCH 46/50] add piqa --- .../tasks/piqa/alternative_worlds/README.md | 20 +++++ .../output_variation/_piqa_alt_ov_yaml | 24 +++++ .../output_variation/piqa_alt_ov.yaml | 10 +++ .../output_variation/style_01/a.yaml | 6 ++ .../output_variation/style_01/b.yaml | 6 ++ .../output_variation/style_01/c.yaml | 6 ++ .../output_variation/style_02/a.yaml | 6 ++ .../output_variation/style_02/b.yaml | 6 ++ .../output_variation/style_02/c.yaml | 6 ++ .../output_variation/style_03/a.yaml | 6 ++ .../output_variation/style_03/b.yaml | 6 ++ .../output_variation/style_03/c.yaml | 6 ++ .../output_variation/style_04/a.yaml | 6 ++ .../output_variation/style_04/b.yaml | 6 ++ .../output_variation/style_04/c.yaml | 6 ++ .../output_variation/style_05/a.yaml | 6 ++ .../output_variation/style_05/b.yaml | 6 ++ .../output_variation/style_05/c.yaml | 6 ++ .../output_variation/style_06/a.yaml | 6 ++ .../output_variation/style_06/b.yaml | 6 ++ .../output_variation/style_06/c.yaml | 6 ++ .../output_variation/style_07/a.yaml | 6 ++ .../output_variation/style_07/b.yaml | 6 ++ .../output_variation/style_07/c.yaml | 6 ++ .../output_variation/style_08/a.yaml | 6 ++ .../output_variation/style_08/b.yaml | 6 ++ .../output_variation/style_08/c.yaml | 6 ++ .../output_variation/styles.py | 90 +++++++++++++++++++ .../prompt_variation/_piqa_yaml | 47 ++++++++++ .../prompt_variation/style_01.yaml | 5 ++ .../prompt_variation/style_02.yaml | 5 ++ .../prompt_variation/style_03.yaml | 5 ++ 32 files changed, 350 insertions(+) create mode 100644 lm_eval/tasks/piqa/alternative_worlds/README.md create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py create mode 100644 lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml diff --git a/lm_eval/tasks/piqa/alternative_worlds/README.md b/lm_eval/tasks/piqa/alternative_worlds/README.md new file mode 100644 index 00000000..93600ae1 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml new file mode 100644 index 00000000..442dd076 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml @@ -0,0 +1,24 @@ +group: + - ai2_arc +task: piqa +dataset_path: ai2_arc +dataset_name: ARC-Easy +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: test +doc_to_text: "Question: {{question}}\nAnswer:" +doc_to_target: "{{choices.label.index(answerKey)}}" +doc_to_choice: "{{choices.text}}" +should_decontaminate: true +doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml new file mode 100644 index 00000000..da917426 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/piqa_alt_ov.yaml @@ -0,0 +1,10 @@ +group: piqa_alt_ov +task: + - piqa_alt_ov_01 + - piqa_alt_ov_02 + - piqa_alt_ov_03 + - piqa_alt_ov_04 + - piqa_alt_ov_05 + - piqa_alt_ov_06 + - piqa_alt_ov_07 + - piqa_alt_ov_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 00000000..a19793a7 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_01 +task: piqa_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 00000000..6ae4b304 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_01 +task: piqa_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 00000000..84cbd654 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_01 +task: piqa_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 00000000..ad48ca78 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_02 +task: piqa_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 00000000..a7c2b84a --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_02 +task: piqa_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 00000000..e2f45ff9 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_02 +task: piqa_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 00000000..70a5ec8c --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_03 +task: piqa_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 00000000..af73914b --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_03 +task: piqa_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 00000000..3484705f --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_03 +task: piqa_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 00000000..79a36448 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_04 +task: piqa_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 00000000..36b3347c --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_04 +task: piqa_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 00000000..3e62b456 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_04 +task: piqa_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 00000000..54c4a980 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_05 +task: piqa_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 00000000..2954cc53 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_05 +task: piqa_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 00000000..b604e1d2 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_05 +task: piqa_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 00000000..d0bc222d --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_06 +task: piqa_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 00000000..393db4a0 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_06 +task: piqa_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 00000000..2be7bf0d --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_06 +task: piqa_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 00000000..4a95f9e8 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_07 +task: piqa_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 00000000..b6bc0432 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_07 +task: piqa_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 00000000..113fb52b --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_07 +task: piqa_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 00000000..7f56c485 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_08 +task: piqa_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 00000000..4458cd50 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_08 +task: piqa_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 00000000..01e77487 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_piqa_alt_ov_yaml +group: piqa_alt_ov_08 +task: piqa_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py new file mode 100644 index 00000000..96469b76 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = [doc["sol1"], doc["sol2"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["goal"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = [doc["sol1"], doc["sol2"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = [doc["sol1"], doc["sol2"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml new file mode 100644 index 00000000..48a21e76 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml @@ -0,0 +1,47 @@ +# dataset_path: ai2_arc +# dataset_name: ARC-Easy +# output_type: multiple_choice +# training_split: train +# validation_split: validation +# test_split: test +# doc_to_text: "Question: {{question}}\nAnswer:" +# doc_to_target: "{{choices.label.index(answerKey)}}" +# doc_to_choice: "{{choices.text}}" +# should_decontaminate: true +# doc_to_decontamination_query: "Question: {{question}}\nAnswer:" +# metric_list: +# - metric: acc +# aggregation: mean +# higher_is_better: true +# - metric: acc_norm +# aggregation: mean +# higher_is_better: true +# - metric: brier_score +# aggregation: brier_score +# higher_is_better: false + + + +dataset_path: piqa +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +test_split: null +doc_to_text: "Question: {{goal}}\nAnswer:" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" +should_decontaminate: true +doc_to_decontamination_query: goal +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 00000000..563c551d --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _piqa_yaml +group: piqa_alt_pv +task: piqa_alt_pv_01 +doc_to_text: "{{goal}}" +doc_to_decontamination_query: "{{goal}}" diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 00000000..32e1fe76 --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _piqa_yaml +group: piqa_alt_pv +task: piqa_alt_pv_02 +doc_to_text: "Q: {{goal}}\nA:" +doc_to_decontamination_query: "Q: {{goal}}\nA:" diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 00000000..7bb8bfef --- /dev/null +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _piqa_yaml +group: piqa_alt_pv +task: piqa_alt_pv_03 +doc_to_text: "Question: {{goal}}\nAnswer:" +doc_to_decontamination_query: "Question: {{goal}}\nAnswer:" -- GitLab From 4654353810b883f8f2a73adbbe484a17a4cb49b1 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 27 Dec 2023 06:44:16 +0000 Subject: [PATCH 47/50] add siqa --- .../tasks/siqa/alternative_worlds/README.md | 20 +++++ .../output_variation/_siqa_alt_yaml | 19 ++++ .../output_variation/siqa_alt.yaml | 10 +++ .../output_variation/style_01/a.yaml | 6 ++ .../output_variation/style_01/b.yaml | 6 ++ .../output_variation/style_01/c.yaml | 6 ++ .../output_variation/style_02/a.yaml | 6 ++ .../output_variation/style_02/b.yaml | 6 ++ .../output_variation/style_02/c.yaml | 6 ++ .../output_variation/style_03/a.yaml | 6 ++ .../output_variation/style_03/b.yaml | 6 ++ .../output_variation/style_03/c.yaml | 6 ++ .../output_variation/style_04/a.yaml | 6 ++ .../output_variation/style_04/b.yaml | 6 ++ .../output_variation/style_04/c.yaml | 6 ++ .../output_variation/style_05/a.yaml | 6 ++ .../output_variation/style_05/b.yaml | 6 ++ .../output_variation/style_05/c.yaml | 6 ++ .../output_variation/style_06/a.yaml | 6 ++ .../output_variation/style_06/b.yaml | 6 ++ .../output_variation/style_06/c.yaml | 6 ++ .../output_variation/style_07/a.yaml | 6 ++ .../output_variation/style_07/b.yaml | 6 ++ .../output_variation/style_07/c.yaml | 6 ++ .../output_variation/style_08/a.yaml | 6 ++ .../output_variation/style_08/b.yaml | 6 ++ .../output_variation/style_08/c.yaml | 6 ++ .../output_variation/styles.py | 90 +++++++++++++++++++ .../prompt_variation/_siqa_alt_yaml | 19 ++++ .../prompt_variation/style_01.yaml | 5 ++ .../prompt_variation/style_02.yaml | 5 ++ .../prompt_variation/style_03.yaml | 5 ++ 32 files changed, 317 insertions(+) create mode 100644 lm_eval/tasks/siqa/alternative_worlds/README.md create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py create mode 100644 lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml diff --git a/lm_eval/tasks/siqa/alternative_worlds/README.md b/lm_eval/tasks/siqa/alternative_worlds/README.md new file mode 100644 index 00000000..93600ae1 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml new file mode 100644 index 00000000..157c7389 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/_siqa_alt_yaml @@ -0,0 +1,19 @@ +task: social_iqa +dataset_path: social_i_qa +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"] +doc_to_target: "{{label}}" +should_decontaminate: true +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml new file mode 100644 index 00000000..4d7079ce --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/siqa_alt.yaml @@ -0,0 +1,10 @@ +group: siqa_alt_ov +task: + - siqa_alt_ov_01 + - siqa_alt_ov_02 + - siqa_alt_ov_03 + - siqa_alt_ov_04 + - siqa_alt_ov_05 + - siqa_alt_ov_06 + - siqa_alt_ov_07 + - siqa_alt_ov_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 00000000..9d791740 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_01 +task: siqa_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 00000000..b942970f --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_01 +task: siqa_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 00000000..a4bb701d --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_01 +task: siqa_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 00000000..94477b97 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_02 +task: siqa_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 00000000..a57d7d15 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_02 +task: siqa_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 00000000..5a47424a --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_02 +task: siqa_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 00000000..5eeabdd3 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_03 +task: siqa_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 00000000..0b39ea71 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_03 +task: siqa_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 00000000..cf97679a --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_03 +task: siqa_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 00000000..251cf948 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_04 +task: siqa_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 00000000..ea9619a7 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_04 +task: siqa_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 00000000..8cacf98b --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_04 +task: siqa_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 00000000..4753ad0b --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_05 +task: siqa_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 00000000..0ae84f2d --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_05 +task: siqa_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 00000000..fd47fb6b --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_05 +task: siqa_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 00000000..12774974 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_06 +task: siqa_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 00000000..30cbf0ef --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_06 +task: siqa_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 00000000..f309834d --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_06 +task: siqa_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 00000000..5cc468f9 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_07 +task: siqa_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 00000000..e7de1319 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_07 +task: siqa_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 00000000..b0904d14 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_07 +task: siqa_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 00000000..0d971d4e --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_08 +task: siqa_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 00000000..32151413 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_08 +task: siqa_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 00000000..10b32b01 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_siqa_alt_yaml +group: siqa_alt_ov_08 +task: siqa_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py new file mode 100644 index 00000000..ee263cc5 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/output_variation/styles.py @@ -0,0 +1,90 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = [doc["answerA"], doc["answerB"], doc["answerC"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [ + "Question: " + doc["context"] + " " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = [doc["answerA"], doc["answerB"], doc["answerC"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = [doc["answerA"], doc["answerB"], doc["answerC"]] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml new file mode 100644 index 00000000..157c7389 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/_siqa_alt_yaml @@ -0,0 +1,19 @@ +task: social_iqa +dataset_path: social_i_qa +dataset_name: null +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_choice: ["{{answerA}}", "{{answerB}}", "{{answerC}}"] +doc_to_target: "{{label}}" +should_decontaminate: true +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 00000000..81cc8595 --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _siqa_alt_yaml +group: siqa_alt_pv +task: siqa_alt_pv_01 +doc_to_text: "{{context}} {{question}}" +doc_to_decontamination_query: "{{context}} {{question}}" diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 00000000..287bd5fd --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _siqa_alt_yaml +group: siqa_alt_pv +task: siqa_alt_pv_02 +doc_to_text: "Q: {{context}} {{question}}\nA:" +doc_to_decontamination_query: "Q: {{context}} {{question}}\nA:" diff --git a/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 00000000..76d62c4c --- /dev/null +++ b/lm_eval/tasks/siqa/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _siqa_alt_yaml +group: siqa_alt_pv +task: siqa_alt_pv_03 +doc_to_text: "Question: {{context}} {{question}}\nAnswer:" +doc_to_decontamination_query: "Question: {{context}} {{question}}\nAnswer:" -- GitLab From 379bb7eba6cbc04d3637739e132a0b66ae0d1379 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 27 Dec 2023 07:00:29 +0000 Subject: [PATCH 48/50] add boolq --- .../boolq/alternative_worlds/README.md | 20 ++++ .../output_variation/_boolq_alt_ov_yaml | 21 +++++ .../output_variation/boolq_ov.yaml | 10 ++ .../output_variation/style_01/a.yaml | 6 ++ .../output_variation/style_01/b.yaml | 6 ++ .../output_variation/style_01/c.yaml | 6 ++ .../output_variation/style_02/a.yaml | 6 ++ .../output_variation/style_02/b.yaml | 6 ++ .../output_variation/style_02/c.yaml | 6 ++ .../output_variation/style_03/a.yaml | 6 ++ .../output_variation/style_03/b.yaml | 6 ++ .../output_variation/style_03/c.yaml | 6 ++ .../output_variation/style_04/a.yaml | 6 ++ .../output_variation/style_04/b.yaml | 6 ++ .../output_variation/style_04/c.yaml | 6 ++ .../output_variation/style_05/a.yaml | 6 ++ .../output_variation/style_05/b.yaml | 6 ++ .../output_variation/style_05/c.yaml | 6 ++ .../output_variation/style_06/a.yaml | 6 ++ .../output_variation/style_06/b.yaml | 6 ++ .../output_variation/style_06/c.yaml | 6 ++ .../output_variation/style_07/a.yaml | 6 ++ .../output_variation/style_07/b.yaml | 6 ++ .../output_variation/style_07/c.yaml | 6 ++ .../output_variation/style_08/a.yaml | 6 ++ .../output_variation/style_08/b.yaml | 6 ++ .../output_variation/style_08/c.yaml | 6 ++ .../output_variation/styles.py | 91 +++++++++++++++++++ .../prompt_variation/_piqa_yaml | 21 +++++ .../prompt_variation/style_01.yaml | 5 + .../prompt_variation/style_02.yaml | 5 + .../prompt_variation/style_03.yaml | 5 + 32 files changed, 322 insertions(+) create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml create mode 100644 lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md b/lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md new file mode 100644 index 00000000..93600ae1 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/README.md @@ -0,0 +1,20 @@ + + +Investigate affect of letter options +- (A) +- A) +- A. +- A\t +- (a) +- a) +- a. +- a\t + +Answer types: +- letters only + - original option + - just letter +- letters + continuation + - original option + - just letter +- continuation diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml new file mode 100644 index 00000000..f2daf214 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/_boolq_alt_ov_yaml @@ -0,0 +1,21 @@ +dataset_path: super_glue +dataset_name: boolq +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_target: label +doc_to_choice: ["no", "yes"] +should_decontaminate: true +doc_to_decontamination_query: passage + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml new file mode 100644 index 00000000..a887cf17 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/boolq_ov.yaml @@ -0,0 +1,10 @@ +group: boolq_alt_ov +task: + - boolq_alt_ov_01 + - boolq_alt_ov_02 + - boolq_alt_ov_03 + - boolq_alt_ov_04 + - boolq_alt_ov_05 + - boolq_alt_ov_06 + - boolq_alt_ov_07 + - boolq_alt_ov_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml new file mode 100644 index 00000000..be017f32 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_01 +task: boolq_alt_ov_01a +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01a +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml new file mode 100644 index 00000000..b8c498a2 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_01 +task: boolq_alt_ov_01b +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01b +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml new file mode 100644 index 00000000..886a7597 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_01/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_01 +task: boolq_alt_ov_01c +doc_to_text: !function ../styles.template_01 +doc_to_choice: !function ../styles.choice_01c +doc_to_decontamination_query: !function ../styles.template_01 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml new file mode 100644 index 00000000..26575550 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_02 +task: boolq_alt_ov_02a +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02a +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml new file mode 100644 index 00000000..8cf9122e --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_02 +task: boolq_alt_ov_02b +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02b +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml new file mode 100644 index 00000000..336f9e99 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_02/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_02 +task: boolq_alt_ov_02c +doc_to_text: !function ../styles.template_02 +doc_to_choice: !function ../styles.choice_02c +doc_to_decontamination_query: !function ../styles.template_02 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml new file mode 100644 index 00000000..3254a128 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_03 +task: boolq_alt_ov_03a +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03a +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml new file mode 100644 index 00000000..683b9c3c --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_03 +task: boolq_alt_ov_03b +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03b +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml new file mode 100644 index 00000000..63fbec39 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_03/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_03 +task: boolq_alt_ov_03c +doc_to_text: !function ../styles.template_03 +doc_to_choice: !function ../styles.choice_03c +doc_to_decontamination_query: !function ../styles.template_03 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml new file mode 100644 index 00000000..b7a88f2e --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_04 +task: boolq_alt_ov_04a +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04a +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml new file mode 100644 index 00000000..dddc7468 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_04 +task: boolq_alt_ov_04b +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04b +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml new file mode 100644 index 00000000..ce0c4aac --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_04/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_04 +task: boolq_alt_ov_04c +doc_to_text: !function ../styles.template_04 +doc_to_choice: !function ../styles.choice_04c +doc_to_decontamination_query: !function ../styles.template_04 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml new file mode 100644 index 00000000..42629c27 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_05 +task: boolq_alt_ov_05a +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05a +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml new file mode 100644 index 00000000..9e231f53 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_05 +task: boolq_alt_ov_05b +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05b +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml new file mode 100644 index 00000000..44594c91 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_05/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_05 +task: boolq_alt_ov_05c +doc_to_text: !function ../styles.template_05 +doc_to_choice: !function ../styles.choice_05c +doc_to_decontamination_query: !function ../styles.template_05 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml new file mode 100644 index 00000000..8f3efdf4 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_06 +task: boolq_alt_ov_06a +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06a +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml new file mode 100644 index 00000000..6602bad3 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_06 +task: boolq_alt_ov_06b +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06b +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml new file mode 100644 index 00000000..7bc23a3f --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_06/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_06 +task: boolq_alt_ov_06c +doc_to_text: !function ../styles.template_06 +doc_to_choice: !function ../styles.choice_06c +doc_to_decontamination_query: !function ../styles.template_06 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml new file mode 100644 index 00000000..6b1aa481 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_07 +task: boolq_alt_ov_07a +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07a +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml new file mode 100644 index 00000000..75fe17ef --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_07 +task: boolq_alt_ov_07b +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07b +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml new file mode 100644 index 00000000..92d5b264 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_07/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_07 +task: boolq_alt_ov_07c +doc_to_text: !function ../styles.template_07 +doc_to_choice: !function ../styles.choice_07c +doc_to_decontamination_query: !function ../styles.template_07 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml new file mode 100644 index 00000000..0d5c29ea --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/a.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_08 +task: boolq_alt_ov_08a +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08a +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml new file mode 100644 index 00000000..0b7a0967 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/b.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_08 +task: boolq_alt_ov_08b +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08b +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml new file mode 100644 index 00000000..f84b105a --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/style_08/c.yaml @@ -0,0 +1,6 @@ +include: ../_boolq_alt_ov_yaml +group: boolq_alt_ov_08 +task: boolq_alt_ov_08c +doc_to_text: !function ../styles.template_08 +doc_to_choice: !function ../styles.choice_08c +doc_to_decontamination_query: !function ../styles.template_08 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py new file mode 100644 index 00000000..6dd52059 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/output_variation/styles.py @@ -0,0 +1,91 @@ +import string +from functools import partial + + +def doc_to_text_base(alphabet, style, doc): + + choices = ["no", "yes"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + + if "\t" in style: + choice_string = "{}{}" + else: + choice_string = "{} {}" + + doc_to_text = "\n".join( + [doc["passage"]] + + [ + "Question: " + doc["question"], + ] + + [choice_string.format(i, j) for i, j in zip(letter_list, choices)] + + ["Answer:"] + ) + + return doc_to_text + + +# Full continuation +def choice_A(doc): + return doc["choices"]["text"] + + +# Letters only +def choice_B(alphabet, style, doc): + + choices = ["no", "yes"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" in style: + letter_list = [letter.replace("\t", "") for letter in letter_list] + + return letter_list + + +# Letters + Full continuation +def choice_C(alphabet, style, doc): + + choices = ["no", "yes"] + num = len(choices) + + letter_list = [style.format(letter) for letter in alphabet[0:num]] + if "\t" not in style: + letter_list = [letter + " " for letter in letter_list] + + return [letter + choice for letter, choice in zip(letter_list, choices)] + + +template_01 = partial(doc_to_text_base, string.ascii_lowercase, "({})") +choice_01a = choice_A +choice_01b = partial(choice_B, string.ascii_lowercase, "({})") +choice_01c = partial(choice_C, string.ascii_lowercase, "({})") +template_02 = partial(doc_to_text_base, string.ascii_lowercase, "{})") +choice_02a = choice_A +choice_02b = partial(choice_B, string.ascii_lowercase, "{})") +choice_02c = partial(choice_C, string.ascii_lowercase, "{})") +template_03 = partial(doc_to_text_base, string.ascii_lowercase, "{}.") +choice_03a = choice_A +choice_03b = partial(choice_B, string.ascii_lowercase, "{}.") +choice_03c = partial(choice_C, string.ascii_lowercase, "{}.") +template_04 = partial(doc_to_text_base, string.ascii_lowercase, "{}\t") +choice_04a = choice_A +choice_04b = partial(choice_B, string.ascii_lowercase, "{}\t") +choice_04c = partial(choice_C, string.ascii_lowercase, "{}\t") +template_05 = partial(doc_to_text_base, string.ascii_uppercase, "({})") +choice_05a = choice_A +choice_05b = partial(choice_B, string.ascii_uppercase, "({})") +choice_05c = partial(choice_C, string.ascii_uppercase, "({})") +template_06 = partial(doc_to_text_base, string.ascii_uppercase, "{})") +choice_06a = choice_A +choice_06b = partial(choice_B, string.ascii_uppercase, "{})") +choice_06c = partial(choice_C, string.ascii_uppercase, "{})") +template_07 = partial(doc_to_text_base, string.ascii_uppercase, "{}.") +choice_07a = choice_A +choice_07b = partial(choice_B, string.ascii_uppercase, "{}.") +choice_07c = partial(choice_C, string.ascii_uppercase, "{}.") +template_08 = partial(doc_to_text_base, string.ascii_uppercase, "{}\t") +choice_08a = choice_A +choice_08b = partial(choice_B, string.ascii_uppercase, "{}\t") +choice_08c = partial(choice_C, string.ascii_uppercase, "{}\t") diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml new file mode 100644 index 00000000..f2daf214 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/_piqa_yaml @@ -0,0 +1,21 @@ +dataset_path: super_glue +dataset_name: boolq +output_type: multiple_choice +training_split: train +validation_split: validation +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_target: label +doc_to_choice: ["no", "yes"] +should_decontaminate: true +doc_to_decontamination_query: passage + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true + - metric: brier_score + aggregation: brier_score + higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml new file mode 100644 index 00000000..3b8d1960 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_01.yaml @@ -0,0 +1,5 @@ +include: _boolq_yaml +group: boolq_alt_pv +task: boolq_alt_pv_01 +doc_to_text: "{{passage}}\n{{question}}?" +doc_to_decontamination_query: "{{passage}}\n{{question}}?" diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml new file mode 100644 index 00000000..1cd464f1 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_02.yaml @@ -0,0 +1,5 @@ +include: _boolq_yaml +group: boolq_alt_pv +task: boolq_alt_pv_02 +doc_to_text: "{{passage}}\nQ: {{question}}?\nA:" +doc_to_decontamination_query: "{{passage}}\nQ: {{question}}?\nA:" diff --git a/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml new file mode 100644 index 00000000..9e36afd8 --- /dev/null +++ b/lm_eval/tasks/super_glue/boolq/alternative_worlds/prompt_variation/style_03.yaml @@ -0,0 +1,5 @@ +include: _boolq_yaml +group: boolq_alt_pv +task: boolq_alt_pv_03 +doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" +doc_to_decontamination_query: "{{passage}}\nQuestion: {{question}}?\nAnswer:" -- GitLab From aa44be3f6873f31feb32954624434471400e3aa2 Mon Sep 17 00:00:00 2001 From: lintangsutawika Date: Wed, 27 Dec 2023 12:06:47 +0000 Subject: [PATCH 49/50] fixed piqa ov --- .../output_variation/_piqa_alt_ov_yaml | 16 +++++------- .../output_variation/styles.py | 2 +- .../prompt_variation/_piqa_yaml | 25 ------------------- 3 files changed, 7 insertions(+), 36 deletions(-) diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml b/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml index 442dd076..d26d2ae3 100644 --- a/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/_piqa_alt_ov_yaml @@ -1,17 +1,11 @@ -group: - - ai2_arc -task: piqa -dataset_path: ai2_arc -dataset_name: ARC-Easy +dataset_path: piqa output_type: multiple_choice training_split: train validation_split: validation -test_split: test -doc_to_text: "Question: {{question}}\nAnswer:" -doc_to_target: "{{choices.label.index(answerKey)}}" -doc_to_choice: "{{choices.text}}" +doc_to_text: "Question: {{goal}}\nAnswer:" +doc_to_target: label +doc_to_choice: "{{[sol1, sol2]}}" should_decontaminate: true -doc_to_decontamination_query: "Question: {{question}}\nAnswer:" metric_list: - metric: acc aggregation: mean @@ -22,3 +16,5 @@ metric_list: - metric: brier_score aggregation: brier_score higher_is_better: false +metadata: + - version: 1.0 diff --git a/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py b/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py index 96469b76..68447e2d 100644 --- a/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py +++ b/lm_eval/tasks/piqa/alternative_worlds/output_variation/styles.py @@ -27,7 +27,7 @@ def doc_to_text_base(alphabet, style, doc): # Full continuation def choice_A(doc): - return doc["choices"]["text"] + return [doc["sol1"], doc["sol2"]] # Letters only diff --git a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml index 48a21e76..eb1580c7 100644 --- a/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml +++ b/lm_eval/tasks/piqa/alternative_worlds/prompt_variation/_piqa_yaml @@ -1,33 +1,8 @@ -# dataset_path: ai2_arc -# dataset_name: ARC-Easy -# output_type: multiple_choice -# training_split: train -# validation_split: validation -# test_split: test -# doc_to_text: "Question: {{question}}\nAnswer:" -# doc_to_target: "{{choices.label.index(answerKey)}}" -# doc_to_choice: "{{choices.text}}" -# should_decontaminate: true -# doc_to_decontamination_query: "Question: {{question}}\nAnswer:" -# metric_list: -# - metric: acc -# aggregation: mean -# higher_is_better: true -# - metric: acc_norm -# aggregation: mean -# higher_is_better: true -# - metric: brier_score -# aggregation: brier_score -# higher_is_better: false - - - dataset_path: piqa dataset_name: null output_type: multiple_choice training_split: train validation_split: validation -test_split: null doc_to_text: "Question: {{goal}}\nAnswer:" doc_to_target: label doc_to_choice: "{{[sol1, sol2]}}" -- GitLab From ada4a31d69fd58ce794b60944900bd1471fbad3e Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 16 Jan 2024 14:10:17 -0800 Subject: [PATCH 50/50] Update README.md with custom integration doc (#1298) * Update README.md * punctuation --------- Co-authored-by: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e042ad86..91f07b65 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,8 @@ It is on our roadmap to create task variants designed to enable models which do A number of other libraries contain scripts for calling the eval harness through their library. These include [GPT-NeoX](https://github.com/EleutherAI/gpt-neox/blob/main/eval_tasks/eval_adapter.py), [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/blob/main/examples/MoE/readme_evalharness.md), and [mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax/blob/master/eval_harness.py). +To create your own custom integration you can follow instructions from [this tutorial](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage). + ### Additional Features If you have a Metal compatible Mac, you can run the eval harness using the MPS back-end by replacing `--device cuda:0` with `--device mps` (requires PyTorch version 2.1 or higher). -- GitLab