From a5c344cf5c48ef70ce7a2edc311e66bdaf7a1ed8 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Tue, 21 Jan 2025 05:38:38 +0900 Subject: [PATCH 01/32] add hrm8k benchmark for both Korean and English (#2627) * add hrm8k benchmark for both Korean and English * apply precommit * revise tasks to make models not to directly answer; use zeroshot_cot if possible * add README * Add hrm8k on the task-list --------- Co-authored-by: Baber --- lm_eval/tasks/README.md | 259 ++++++++-------- lm_eval/tasks/hrm8k/README.md | 46 +++ lm_eval/tasks/hrm8k/default/_hrm8k_yaml | 22 ++ lm_eval/tasks/hrm8k/default/hrm8k.yaml | 13 + lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml | 3 + lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml | 3 + lm_eval/tasks/hrm8k/default/hrm8k_math.yaml | 3 + lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml | 4 + .../tasks/hrm8k/default/hrm8k_omni_math.yaml | 3 + lm_eval/tasks/hrm8k/default/utils.py | 285 ++++++++++++++++++ lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml | 22 ++ lm_eval/tasks/hrm8k/en/hrm8k_en.yaml | 13 + lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml | 3 + lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml | 3 + lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml | 3 + lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml | 4 + .../tasks/hrm8k/en/hrm8k_omni_math_en.yaml | 3 + lm_eval/tasks/hrm8k/en/utils.py | 285 ++++++++++++++++++ 18 files changed, 848 insertions(+), 129 deletions(-) create mode 100644 lm_eval/tasks/hrm8k/README.md create mode 100644 lm_eval/tasks/hrm8k/default/_hrm8k_yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_math.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml create mode 100644 lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml create mode 100644 lm_eval/tasks/hrm8k/default/utils.py create mode 100644 lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml create mode 100644 lm_eval/tasks/hrm8k/en/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 8a9363a9..c92043bc 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -5,135 +5,136 @@ For more information, including a full list of task names and their precise meanings or sources, follow the links provided to the individual README.md files for each subfolder. -| Task Family | Description | Language(s) | -|-------------|-------------|-------------| -| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | -| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | -| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | -| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | -| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | -| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | -| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | -| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | -| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | -| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | -| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | -| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | -| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | -| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | -| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | -| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | -| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | -| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | -| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | -| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | -| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | -| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | -| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | -| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | -| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | -| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | -| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | -| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | -| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | -| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | -| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | -| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | -| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | -| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | -| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | -| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French| -| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | -| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | -| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | -| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | -| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | -| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | -| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | -| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | -| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | -| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | -| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | -| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | -| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | -| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | -| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | -| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | -| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | -| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | -| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | -| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | -| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | -| [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | -| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | -| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | -| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | -| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | -| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | +| Task Family | Description | Language(s) | +|-------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| +| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | +| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | +| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | +| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | +| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | +| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | +| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | +| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | +| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | +| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | +| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | +| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | +| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | +| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | +| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | +| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | +| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | +| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | +| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | +| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | +| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | +| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | +| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | +| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | +| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | +| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | +| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | +| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | +| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | +| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | +| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | +| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | +| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | +| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | +| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | +| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | +| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | +| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | +| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | +| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | +| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | +| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | +| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | +| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | +| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | +| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | +| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | +| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | +| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | +| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | +| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | +| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | +| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | +| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | +| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | +| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | +| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | +| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | +| [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | +| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | +| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | +| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | +| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | +| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | | [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | -| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | -| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | -| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | -| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | -| medqa | Multiple choice question answering based on the United States Medical License Exams. | | -| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | -| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | +| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | +| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | +| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | +| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | +| medqa | Multiple choice question answering based on the United States Medical License Exams. | | +| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | +| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | | [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | -| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | -| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | -| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | -| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | -| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | -| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | -| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | -| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | -| [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | -| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | -| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | -| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | -| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | -| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | -| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | -| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | -| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | -| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | -| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | -| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | -| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | -| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | -| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | -| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | -| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | -| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | -| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | -| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | -| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | -| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | -| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | -| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | -| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | -| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | -| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | -| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | -| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | -| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | -| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | -| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | -| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | -| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | -| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | -| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | -| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | -| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | -| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | -| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | -| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | +| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | +| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | +| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | +| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | +| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | +| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | +| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | +| [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | +| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | +| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | +| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | +| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | +| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | +| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | +| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | +| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | +| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | +| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | +| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | +| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | +| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | +| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | +| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | +| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | +| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | +| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | +| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | +| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | +| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | +| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | +| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | +| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | +| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | +| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | +| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | +| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | +| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | +| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | +| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | +| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | +| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | +| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | +| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | +| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | +| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | +| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | +| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | +| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | | [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | -| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | -| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | -| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | -| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | +| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | +| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | +| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | +| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | diff --git a/lm_eval/tasks/hrm8k/README.md b/lm_eval/tasks/hrm8k/README.md new file mode 100644 index 00000000..cd5a1739 --- /dev/null +++ b/lm_eval/tasks/hrm8k/README.md @@ -0,0 +1,46 @@ +# HRM8K + +### Paper + +Title: [Understand, Solve and Translate: Bridging the Multilingual Mathematical Reasoning Gap](https://www.arxiv.org/abs/2501.02448) + +Large language models (LLMs) demonstrate exceptional performance on complex reasoning tasks. However, despite their strong reasoning capabilities in high-resource languages (e.g., English and Chinese), a significant performance gap persists in other languages. To investigate this gap in Korean, we introduce HRM8K, a benchmark comprising 8,011 English-Korean parallel bilingual math problems. Through systematic analysis of model behaviors, we identify a key finding: these performance disparities stem primarily from difficulties in comprehending non-English inputs, rather than limitations in reasoning capabilities. Based on these findings, we propose UST (Understand, Solve, and Translate), a method that strategically uses English as an anchor for reasoning and solution generation. By fine-tuning the model on 130k synthetically generated data points, UST achieves a 10.91% improvement on the HRM8K benchmark and reduces the multilingual performance gap from 11.6% to 0.7%. Additionally, we show that improvements from UST generalize effectively to different Korean domains, demonstrating that capabilities acquired from machine-verifiable content can be generalized to other areas. We publicly release the benchmark, training dataset, and models. + +Homepage: https://huggingface.co/datasets/HAERAE-HUB/HRM8K + + +### Citation + +``` +@article{ko2025understand, + title={Understand, Solve and Translate: Bridging the Multilingual Mathematical Reasoning Gap}, + author={Ko, Hyunwoo and Son, Guijin and Choi, Dasol}, + journal={arXiv preprint arXiv:2501.02448}, + year={2025} +} +``` + +### Groups and and Tasks + +#### Groups + +* `hrm8k`: HRM8K comprises 8,011 instances for evaluation, sourced through a combination of translations from established English benchmarks (e.g., GSM8K, MATH, OmniMath, MMMLU) and original problems curated from existing Korean math exams. This benchmark consists of Korean instruction and question. +* `hrm8k_en`: English version of `hrm8k`. This benchmark consists of English instruction and question. + +#### Tasks + +* `hrm8k_{gsm8k|ksm|math|mmmlu|omni_math}` +* `hrm8k_en_{gsm8k|ksm|math|mmmlu|omni_math}` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/hrm8k/default/_hrm8k_yaml b/lm_eval/tasks/hrm8k/default/_hrm8k_yaml new file mode 100644 index 00000000..18c53d22 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/_hrm8k_yaml @@ -0,0 +1,22 @@ +dataset_path: HAERAE-HUB/HRM8K +output_type: generate_until +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +process_results: !function utils.process_results +num_fewshot: 0 +generation_kwargs: + until: + - "" + - "<|end_of_text|>" + - "<|endoftext|>" + - "<|im_end|>" + max_gen_toks: 512 + do_sample: false + temperature: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/default/hrm8k.yaml b/lm_eval/tasks/hrm8k/default/hrm8k.yaml new file mode 100644 index 00000000..cc9753f6 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k.yaml @@ -0,0 +1,13 @@ +group: hrm8k +task: + - hrm8k_gsm8k + - hrm8k_ksm + - hrm8k_math + - hrm8k_mmmlu + - hrm8k_omni_math +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml new file mode 100644 index 00000000..a46ff5a0 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_gsm8k.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: GSM8K +task: hrm8k_gsm8k diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml new file mode 100644 index 00000000..3c1f7ac2 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_ksm.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: KSM +task: hrm8k_ksm diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml new file mode 100644 index 00000000..ecdf67cf --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_math.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: MATH +task: hrm8k_math diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml new file mode 100644 index 00000000..20faaaf1 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_mmmlu.yaml @@ -0,0 +1,4 @@ +include: _hrm8k_yaml +dataset_name: MMMLU +task: hrm8k_mmmlu +doc_to_text: !function utils.doc_to_text_mmmlu diff --git a/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml b/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml new file mode 100644 index 00000000..c2dadac2 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/hrm8k_omni_math.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_yaml +dataset_name: OMNI_MATH +task: hrm8k_omni_math diff --git a/lm_eval/tasks/hrm8k/default/utils.py b/lm_eval/tasks/hrm8k/default/utils.py new file mode 100644 index 00000000..aaeecd14 --- /dev/null +++ b/lm_eval/tasks/hrm8k/default/utils.py @@ -0,0 +1,285 @@ +import re +from typing import Dict, List + + +def doc_to_text(doc): + text = ( + "주어진 문제를 풀어보세요.\n" + "문제를 푼 후, 최종 답변을 다음과 같은 형식으로 작성하세요: $\\boxed{N}$.\n\n" + f"문제: {doc['question'].strip()}\n답변:" + ) + return text + + +def doc_to_text_mmmlu(doc): + text = ( + "주어진 문제를 풀어보세요.\n" + "문제를 푼 후, 주어진 선택지 (1, 2, 3, 4) 중 최종 선택지를 다음 형식으로 작성하세요: $\\boxed{N}$.\n\n" + f"문제: {doc['question'].strip()}\n답변:" + ) + return text + + +def doc_to_target(doc): + return postprocess(doc["answer"]) + + +def postprocess(s): + s = str(s).strip() + try: + float_value = float(s) + return str(int(float_value)) if float_value.is_integer() else str(float_value) + except Exception: + return s + + +def process_results(doc: dict, results: List[str]) -> Dict[str, int]: + candidate = results[0] + + gold = postprocess(doc["answer"]) + + if not gold: + print(doc, candidate, gold) + if is_equiv(candidate, gold): + retval = 1 + else: + retval = 0 + + results = { + "exact_match": retval, + } + return results + + +def is_equiv(str1, str2, verbose=False): + if str1 is None and str2 is None: + print("WARNING: Both None") + return True + if str1 is None or str2 is None: + return False + + str1, str2 = parse_math_answer(str1), parse_math_answer(str2) + + try: + ss1 = _strip_string(str1) + ss1 = postprocess(ss1) + ss2 = _strip_string(str2) + if verbose: + print(ss1, ss2) + return ss1 == ss2 + except Exception: + return str1 == str2 + + +def parse_math_answer(raw_string): + def remove_boxed(s): + left = "\\boxed{" + try: + assert s[: len(left)] == left + assert s[-1] == "}" + answer = s[len(left) : -1] + if "=" in answer: + answer = answer.split("=")[-1].lstrip(" ") + return answer + except Exception: + return None + + def last_boxed_only_string(string): + idx = string.rfind("\\boxed") + if idx < 0: + idx = string.rfind("\\fbox") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + + if right_brace_idx is None: + retval = None + else: + retval = string[idx : right_brace_idx + 1] + + return retval + + def get_answer_with_dollar_sign(s): + first_pattern = "\$(.*)\$" + last_match = None + matches = re.findall(first_pattern, s) + if matches: + last_match = matches[-1] + if "=" in last_match: + last_match = last_match.split("=")[-1].lstrip(" ") + return last_match + + def get_answer_without_dollar_sign(s): + last_match = None + if "=" in s: + last_match = s.split("=")[-1].lstrip(" ").rstrip(".") + if "\\n" in last_match: + last_match = last_match.split("\\n")[0] + else: + pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])" + matches = re.findall(pattern, s) + if matches: + last_match = matches[-1] + return last_match + + if "\\boxed" in raw_string: + answer = remove_boxed(last_boxed_only_string(raw_string)) + else: + answer = get_answer_with_dollar_sign(raw_string) + if not answer: + answer = get_answer_without_dollar_sign(raw_string) + return answer + + +# code from https://github.com/hendrycks/math/blob/main/modeling/math_equivalence.py +def _fix_fracs(string): + substrs = string.split("\\frac") + new_str = substrs[0] + if len(substrs) > 1: + substrs = substrs[1:] + for substr in substrs: + new_str += "\\frac" + if substr[0] == "{": + new_str += substr + else: + try: + assert len(substr) >= 2 + except Exception: + return string + a = substr[0] + b = substr[1] + if b != "{": + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}{" + b + "}" + post_substr + else: + new_str += "{" + a + "}{" + b + "}" + else: + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}" + b + post_substr + else: + new_str += "{" + a + "}" + b + string = new_str + return string + + +def _fix_a_slash_b(string): + if len(string.split("/")) != 2: + return string + a = string.split("/")[0] + b = string.split("/")[1] + try: + a = int(a) + b = int(b) + assert string == "{}/{}".format(a, b) + new_string = "\\frac{" + str(a) + "}{" + str(b) + "}" + return new_string + except Exception: + return string + + +def _remove_right_units(string): + # "\\text{ " only ever occurs (at least in the val set) when describing units + if "\\text{ " in string: + splits = string.split("\\text{ ") + assert len(splits) == 2 + return splits[0] + else: + return string + + +def _fix_sqrt(string): + if "\\sqrt" not in string: + return string + splits = string.split("\\sqrt") + new_string = splits[0] + for split in splits[1:]: + if split[0] != "{": + a = split[0] + new_substr = "\\sqrt{" + a + "}" + split[1:] + else: + new_substr = "\\sqrt" + split + new_string += new_substr + return new_string + + +def _strip_string(string): + # linebreaks + string = string.replace("\n", "") + # print(string) + + # remove inverse spaces + string = string.replace("\\!", "") + # print(string) + + # replace \\ with \ + string = string.replace("\\\\", "\\") + # print(string) + + # replace tfrac and dfrac with frac + string = string.replace("tfrac", "frac") + string = string.replace("dfrac", "frac") + # print(string) + + # remove \left and \right + string = string.replace("\\left", "") + string = string.replace("\\right", "") + # print(string) + + # Remove circ (degrees) + string = string.replace("^{\\circ}", "") + string = string.replace("^\\circ", "") + + # remove dollar signs + string = string.replace("\\$", "") + + # remove units (on the right) + string = _remove_right_units(string) + + # remove percentage + string = string.replace("\\%", "") + string = string.replace("\%", "") + + # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string + string = string.replace(" .", " 0.") + string = string.replace("{.", "{0.") + # if empty, return empty string + if len(string) == 0: + return string + if string[0] == ".": + string = "0" + string + + # to consider: get rid of e.g. "k = " or "q = " at beginning + if len(string.split("=")) == 2: + if len(string.split("=")[0]) <= 2: + string = string.split("=")[1] + + # fix sqrt3 --> sqrt{3} + string = _fix_sqrt(string) + + # remove spaces + string = string.replace(" ", "") + + # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b} + string = _fix_fracs(string) + + # manually change 0.5 --> \frac{1}{2} + if string == "0.5": + string = "\\frac{1}{2}" + + # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y + string = _fix_a_slash_b(string) + + return string diff --git a/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml b/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml new file mode 100644 index 00000000..18c53d22 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/_hrm8k_en_yaml @@ -0,0 +1,22 @@ +dataset_path: HAERAE-HUB/HRM8K +output_type: generate_until +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +process_results: !function utils.process_results +num_fewshot: 0 +generation_kwargs: + until: + - "" + - "<|end_of_text|>" + - "<|endoftext|>" + - "<|im_end|>" + max_gen_toks: 512 + do_sample: false + temperature: 0 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml new file mode 100644 index 00000000..17eac64a --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_en.yaml @@ -0,0 +1,13 @@ +group: hrm8k_en +task: + - hrm8k_gsm8k_en + - hrm8k_ksm_en + - hrm8k_math_en + - hrm8k_mmmlu_en + - hrm8k_omni_math_en +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml new file mode 100644 index 00000000..c2697a0b --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_gsm8k_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: GSM8K +task: hrm8k_gsm8k_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml new file mode 100644 index 00000000..a5e34d45 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_ksm_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: KSM +task: hrm8k_ksm_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml new file mode 100644 index 00000000..ffbdce81 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_math_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: MATH +task: hrm8k_math_en diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml new file mode 100644 index 00000000..812f62e2 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_mmmlu_en.yaml @@ -0,0 +1,4 @@ +include: _hrm8k_en_yaml +dataset_name: MMMLU +task: hrm8k_mmmlu_en +doc_to_text: !function utils.doc_to_text_mmmlu diff --git a/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml b/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml new file mode 100644 index 00000000..f859de3d --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/hrm8k_omni_math_en.yaml @@ -0,0 +1,3 @@ +include: _hrm8k_en_yaml +dataset_name: OMNI_MATH +task: hrm8k_omni_math_en diff --git a/lm_eval/tasks/hrm8k/en/utils.py b/lm_eval/tasks/hrm8k/en/utils.py new file mode 100644 index 00000000..b67d8e91 --- /dev/null +++ b/lm_eval/tasks/hrm8k/en/utils.py @@ -0,0 +1,285 @@ +import re +from typing import Dict, List + + +def doc_to_text(doc): + text = ( + "Solve the given question.\n" + "After solving the problem, state your final answer in the following format: $\\boxed{N}$.\n\n" + f"Question: {doc['original'].strip()}\nAnswer:" + ) + return text + + +def doc_to_text_mmmlu(doc): + text = ( + "Solve the given question.\n" + "After solving the problem, state your final choice among the choices (1, 2, 3, 4) in the following format: $\\boxed{N}$.\n\n" + f"Question: {doc['original'].strip()}\nAnswer:" + ) + return text + + +def doc_to_target(doc): + return postprocess(doc["answer"]) + + +def postprocess(s): + s = str(s).strip() + try: + float_value = float(s) + return str(int(float_value)) if float_value.is_integer() else str(float_value) + except Exception: + return s + + +def process_results(doc: dict, results: List[str]) -> Dict[str, int]: + candidate = results[0] + + gold = postprocess(doc["answer"]) + + if not gold: + print(doc, candidate, gold) + if is_equiv(candidate, gold): + retval = 1 + else: + retval = 0 + + results = { + "exact_match": retval, + } + return results + + +def is_equiv(str1, str2, verbose=False): + if str1 is None and str2 is None: + print("WARNING: Both None") + return True + if str1 is None or str2 is None: + return False + + str1, str2 = parse_math_answer(str1), parse_math_answer(str2) + + try: + ss1 = _strip_string(str1) + ss1 = postprocess(ss1) + ss2 = _strip_string(str2) + if verbose: + print(ss1, ss2) + return ss1 == ss2 + except Exception: + return str1 == str2 + + +def parse_math_answer(raw_string): + def remove_boxed(s): + left = "\\boxed{" + try: + assert s[: len(left)] == left + assert s[-1] == "}" + answer = s[len(left) : -1] + if "=" in answer: + answer = answer.split("=")[-1].lstrip(" ") + return answer + except Exception: + return None + + def last_boxed_only_string(string): + idx = string.rfind("\\boxed") + if idx < 0: + idx = string.rfind("\\fbox") + if idx < 0: + return None + i = idx + right_brace_idx = None + num_left_braces_open = 0 + while i < len(string): + if string[i] == "{": + num_left_braces_open += 1 + if string[i] == "}": + num_left_braces_open -= 1 + if num_left_braces_open == 0: + right_brace_idx = i + break + i += 1 + + if right_brace_idx is None: + retval = None + else: + retval = string[idx : right_brace_idx + 1] + + return retval + + def get_answer_with_dollar_sign(s): + first_pattern = "\$(.*)\$" + last_match = None + matches = re.findall(first_pattern, s) + if matches: + last_match = matches[-1] + if "=" in last_match: + last_match = last_match.split("=")[-1].lstrip(" ") + return last_match + + def get_answer_without_dollar_sign(s): + last_match = None + if "=" in s: + last_match = s.split("=")[-1].lstrip(" ").rstrip(".") + if "\\n" in last_match: + last_match = last_match.split("\\n")[0] + else: + pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])" + matches = re.findall(pattern, s) + if matches: + last_match = matches[-1] + return last_match + + if "\\boxed" in raw_string: + answer = remove_boxed(last_boxed_only_string(raw_string)) + else: + answer = get_answer_with_dollar_sign(raw_string) + if not answer: + answer = get_answer_without_dollar_sign(raw_string) + return answer + + +# code from https://github.com/hendrycks/math/blob/main/modeling/math_equivalence.py +def _fix_fracs(string): + substrs = string.split("\\frac") + new_str = substrs[0] + if len(substrs) > 1: + substrs = substrs[1:] + for substr in substrs: + new_str += "\\frac" + if substr[0] == "{": + new_str += substr + else: + try: + assert len(substr) >= 2 + except Exception: + return string + a = substr[0] + b = substr[1] + if b != "{": + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}{" + b + "}" + post_substr + else: + new_str += "{" + a + "}{" + b + "}" + else: + if len(substr) > 2: + post_substr = substr[2:] + new_str += "{" + a + "}" + b + post_substr + else: + new_str += "{" + a + "}" + b + string = new_str + return string + + +def _fix_a_slash_b(string): + if len(string.split("/")) != 2: + return string + a = string.split("/")[0] + b = string.split("/")[1] + try: + a = int(a) + b = int(b) + assert string == "{}/{}".format(a, b) + new_string = "\\frac{" + str(a) + "}{" + str(b) + "}" + return new_string + except Exception: + return string + + +def _remove_right_units(string): + # "\\text{ " only ever occurs (at least in the val set) when describing units + if "\\text{ " in string: + splits = string.split("\\text{ ") + assert len(splits) == 2 + return splits[0] + else: + return string + + +def _fix_sqrt(string): + if "\\sqrt" not in string: + return string + splits = string.split("\\sqrt") + new_string = splits[0] + for split in splits[1:]: + if split[0] != "{": + a = split[0] + new_substr = "\\sqrt{" + a + "}" + split[1:] + else: + new_substr = "\\sqrt" + split + new_string += new_substr + return new_string + + +def _strip_string(string): + # linebreaks + string = string.replace("\n", "") + # print(string) + + # remove inverse spaces + string = string.replace("\\!", "") + # print(string) + + # replace \\ with \ + string = string.replace("\\\\", "\\") + # print(string) + + # replace tfrac and dfrac with frac + string = string.replace("tfrac", "frac") + string = string.replace("dfrac", "frac") + # print(string) + + # remove \left and \right + string = string.replace("\\left", "") + string = string.replace("\\right", "") + # print(string) + + # Remove circ (degrees) + string = string.replace("^{\\circ}", "") + string = string.replace("^\\circ", "") + + # remove dollar signs + string = string.replace("\\$", "") + + # remove units (on the right) + string = _remove_right_units(string) + + # remove percentage + string = string.replace("\\%", "") + string = string.replace("\%", "") + + # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string + string = string.replace(" .", " 0.") + string = string.replace("{.", "{0.") + # if empty, return empty string + if len(string) == 0: + return string + if string[0] == ".": + string = "0" + string + + # to consider: get rid of e.g. "k = " or "q = " at beginning + if len(string.split("=")) == 2: + if len(string.split("=")[0]) <= 2: + string = string.split("=")[1] + + # fix sqrt3 --> sqrt{3} + string = _fix_sqrt(string) + + # remove spaces + string = string.replace(" ", "") + + # \frac1b or \frac12 --> \frac{1}{b} and \frac{1}{2}, etc. Even works with \frac1{72} (but not \frac{72}1). Also does a/b --> \\frac{a}{b} + string = _fix_fracs(string) + + # manually change 0.5 --> \frac{1}{2} + if string == "0.5": + string = "\\frac{1}{2}" + + # NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y + string = _fix_a_slash_b(string) + + return string -- GitLab From 6dac8c694b0065e9b29ea2499bec516f9b759952 Mon Sep 17 00:00:00 2001 From: Boda Sadallah Date: Tue, 21 Jan 2025 00:46:18 +0400 Subject: [PATCH 02/32] New arabicmmlu (#2541) * point to the original ArabicMMLU dataset * create the new subtasks files * fix bug when the context filed is empty --- lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml | 2 +- .../arabicmmlu/_arabicmmlu_humanities.yaml | 2 +- .../arabicmmlu/_arabicmmlu_language.yaml | 2 +- .../tasks/arabicmmlu/_arabicmmlu_other.yaml | 2 +- .../_arabicmmlu_social_science.yaml | 2 +- .../tasks/arabicmmlu/_arabicmmlu_stem.yaml | 2 +- .../_default_arabicmmlu_template_yaml | 4 +- lm_eval/tasks/arabicmmlu/_generate_configs.py | 91 +++++++++---------- .../arabicmmlu_accounting_university.yaml | 5 + .../arabicmmlu_arabic_language_general.yaml | 4 +- .../arabicmmlu_arabic_language_grammar.yaml | 4 +- ...rabicmmlu_arabic_language_high_school.yaml | 5 + ...bicmmlu_arabic_language_middle_school.yaml | 5 + ...icmmlu_arabic_language_primary_school.yaml | 5 + .../arabicmmlu_biology_high_school.yaml | 5 + .../arabicmmlu_civics_high_school.yaml | 5 + .../arabicmmlu_civics_middle_school.yaml | 5 + ...abicmmlu_computer_science_high_school.yaml | 5 + ...icmmlu_computer_science_middle_school.yaml | 5 + ...cmmlu_computer_science_primary_school.yaml | 5 + ...rabicmmlu_computer_science_university.yaml | 5 + .../arabicmmlu/arabicmmlu_driving_test.yaml | 2 +- .../arabicmmlu_economics_high_school.yaml | 5 + .../arabicmmlu_economics_middle_school.yaml | 5 + .../arabicmmlu_economics_university.yaml | 5 + .../arabicmmlu_general_knowledge.yaml | 2 +- ...cmmlu_general_knowledge_middle_school.yaml | 5 + ...mmlu_general_knowledge_primary_school.yaml | 5 + .../arabicmmlu_geography_high_school.yaml | 5 + .../arabicmmlu_geography_middle_school.yaml | 5 + .../arabicmmlu_geography_primary_school.yaml | 5 + .../arabicmmlu_high_arabic_language.yaml | 5 - .../arabicmmlu/arabicmmlu_high_biology.yaml | 5 - .../arabicmmlu/arabicmmlu_high_civics.yaml | 5 - .../arabicmmlu_high_computer_science.yaml | 5 - .../arabicmmlu/arabicmmlu_high_economics.yaml | 5 - .../arabicmmlu/arabicmmlu_high_geography.yaml | 5 - .../arabicmmlu/arabicmmlu_high_history.yaml | 5 - .../arabicmmlu_high_islamic_studies.yaml | 5 - .../arabicmmlu_high_philosophy.yaml | 5 - .../arabicmmlu/arabicmmlu_high_physics.yaml | 5 - .../arabicmmlu_history_high_school.yaml | 5 + .../arabicmmlu_history_middle_school.yaml | 5 + .../arabicmmlu_history_primary_school.yaml | 5 + .../arabicmmlu_islamic_studies.yaml | 2 +- ...rabicmmlu_islamic_studies_high_school.yaml | 5 + ...bicmmlu_islamic_studies_middle_school.yaml | 5 + ...icmmlu_islamic_studies_primary_school.yaml | 5 + .../arabicmmlu_law_professional.yaml | 5 + .../arabicmmlu_management_university.yaml | 5 + .../arabicmmlu_math_primary_school.yaml | 5 + .../arabicmmlu_middle_arabic_language.yaml | 5 - .../arabicmmlu/arabicmmlu_middle_civics.yaml | 5 - .../arabicmmlu_middle_computer_science.yaml | 5 - .../arabicmmlu_middle_economics.yaml | 5 - .../arabicmmlu_middle_general_knowledge.yaml | 5 - .../arabicmmlu_middle_geography.yaml | 5 - .../arabicmmlu/arabicmmlu_middle_history.yaml | 5 - .../arabicmmlu_middle_islamic_studies.yaml | 5 - .../arabicmmlu_middle_natural_science.yaml | 5 - .../arabicmmlu_middle_social_science.yaml | 5 - ...bicmmlu_natural_science_middle_school.yaml | 5 + ...icmmlu_natural_science_primary_school.yaml | 5 + .../arabicmmlu_philosophy_high_school.yaml | 5 + .../arabicmmlu_physics_high_school.yaml | 5 + ...abicmmlu_political_science_university.yaml | 5 + .../arabicmmlu_primary_arabic_language.yaml | 5 - .../arabicmmlu_primary_computer_science.yaml | 5 - .../arabicmmlu_primary_general_knowledge.yaml | 5 - .../arabicmmlu_primary_geography.yaml | 5 - .../arabicmmlu_primary_history.yaml | 5 - .../arabicmmlu_primary_islamic_studies.yaml | 5 - .../arabicmmlu/arabicmmlu_primary_math.yaml | 5 - .../arabicmmlu_primary_natural_science.yaml | 5 - .../arabicmmlu_primary_social_science.yaml | 5 - .../tasks/arabicmmlu/arabicmmlu_prof_law.yaml | 5 - ...abicmmlu_social_science_middle_school.yaml | 5 + ...bicmmlu_social_science_primary_school.yaml | 5 + .../arabicmmlu_univ_accounting.yaml | 5 - .../arabicmmlu_univ_computer_science.yaml | 5 - .../arabicmmlu/arabicmmlu_univ_economics.yaml | 5 - .../arabicmmlu_univ_management.yaml | 5 - .../arabicmmlu_univ_political_science.yaml | 5 - lm_eval/tasks/arabicmmlu/utils.py | 2 +- 84 files changed, 236 insertions(+), 237 deletions(-) create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml create mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml delete mode 100644 lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml index 58cf795a..08ed9bb0 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu.yaml @@ -9,4 +9,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml index 6f61004a..b52bc804 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_humanities.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml index 90e57ae0..d9f62abc 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_language.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml index 3e989b8c..d96dc0bd 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_other.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml index 1ece047b..b40e7c80 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_social_science.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml b/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml index a464a62a..5065d0bd 100644 --- a/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml +++ b/lm_eval/tasks/arabicmmlu/_arabicmmlu_stem.yaml @@ -6,4 +6,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 0 + version: 1 diff --git a/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml b/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml index eac23577..471c0fc0 100644 --- a/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml +++ b/lm_eval/tasks/arabicmmlu/_default_arabicmmlu_template_yaml @@ -1,4 +1,4 @@ -dataset_path: yazeed7/ArabicMMLU +dataset_path: MBZUAI/ArabicMMLU test_split: test fewshot_split: dev fewshot_config: @@ -12,4 +12,4 @@ metric_list: aggregation: mean higher_is_better: true metadata: - version: 0.0 + version: 1.0 diff --git a/lm_eval/tasks/arabicmmlu/_generate_configs.py b/lm_eval/tasks/arabicmmlu/_generate_configs.py index 4d091e12..ea59fe98 100644 --- a/lm_eval/tasks/arabicmmlu/_generate_configs.py +++ b/lm_eval/tasks/arabicmmlu/_generate_configs.py @@ -13,48 +13,46 @@ from tqdm import tqdm eval_logger = logging.getLogger("lm-eval") -SUBJECTS = { - "Driving Test": "other", - "High Geography": "social_science", - "High History": "humanities", - "Islamic Studies": "humanities", - "Univ Accounting": "social_science", - "Primary General Knowledge": "other", - "Univ Political Science": "social_science", - "Primary Math": "stem", - "Middle General Knowledge": "other", - "High Biology": "stem", - "Primary Natural Science": "stem", - "High Economics": "social_science", - "Middle Natural Science": "stem", - "Middle Geography": "social_science", - "Primary Social Science": "social_science", - "Middle Computer Science": "stem", - "Middle Islamic Studies": "humanities", - "Primary Computer Science": "stem", - "High Physics": "stem", - "Middle Social Science": "social_science", - "Middle Civics": "social_science", - "High Computer Science": "stem", - "General Knowledge": "other", - "High Civics": "social_science", - "Prof Law": "humanities", - "High Islamic Studies": "humanities", - "Primary Arabic Language": "language", - "High Arabic Language": "language", - "Arabic Language (Grammar)": "language", - "Primary History": "humanities", - "Middle History": "humanities", - "Univ Economics": "social_science", - "Arabic Language (General)": "language", - "Univ Computer Science": "stem", - "Primary Islamic Studies": "humanities", - "Primary Geography": "social_science", - "High Philosophy": "humanities", - "Middle Arabic Language": "language", - "Middle Economics": "social_science", - "Univ Management": "other", -} +SUBJECTS = {'Islamic Studies': 'humanities', + 'Driving Test': 'other', + 'Natural Science (Middle School)': 'stem', + 'Natural Science (Primary School)': 'stem', + 'History (Primary School)': 'humanities', + 'History (Middle School)': 'humanities', + 'History (High School)': 'humanities', + 'General Knowledge': 'other', + 'General Knowledge (Primary School)': 'other', + 'General Knowledge (Middle School)': 'other', + 'Law (Professional)': 'humanities', + 'Physics (High School)': 'stem', + 'Social Science (Middle School)': 'social_science', + 'Social Science (Primary School)': 'social_science', + 'Management (University)': 'other', + 'Arabic Language (Primary School)': 'language', + 'Arabic Language (Middle School)': 'language', + 'Arabic Language (High School)': 'language', + 'Political Science (University)': 'social_science', + 'Philosophy (High School)': 'humanities', + 'Accounting (University)': 'social_science', + 'Computer Science (University)': 'stem', + 'Computer Science (Middle School)': 'stem', + 'Computer Science (Primary School)': 'stem', + 'Computer Science (High School)': 'stem', + 'Geography (Primary School)': 'social_science', + 'Geography (Middle School)': 'social_science', + 'Geography (High School)': 'social_science', + 'Math (Primary School)': 'stem', + 'Biology (High School)': 'stem', + 'Economics (University)': 'social_science', + 'Economics (Middle School)': 'social_science', + 'Economics (High School)': 'social_science', + 'Arabic Language (General)': 'language', + 'Arabic Language (Grammar)': 'language', + 'Islamic Studies (High School)': 'humanities', + 'Islamic Studies (Middle School)': 'humanities', + 'Islamic Studies (Primary School)': 'humanities', + 'Civics (Middle School)': 'social_science', + 'Civics (High School)': 'social_science'} def parse_args(): @@ -69,8 +67,9 @@ if __name__ == "__main__": # get filename of base_yaml so we can `"include": ` it in our "other" YAMLs. base_yaml_name = os.path.split(args.base_yaml_path)[-1] - with open(args.base_yaml_path, encoding="utf-8") as f: - base_yaml = yaml.full_load(f) + + # with open(args.base_yaml_path, encoding="utf-8") as f: + # base_yaml = yaml.full_load(f) ALL_CATEGORIES = [] for subject, category in tqdm(SUBJECTS.items()): @@ -81,8 +80,8 @@ if __name__ == "__main__": yaml_dict = { "include": base_yaml_name, - "tag": f"arabicmmlu_{category}", - "task": f"arabicmmlu_{subject.lower().replace(' ', '_')}", + "tag": f"arabicmmlu_{category}_tasks", + "task": f"arabicmmlu_{subject.lower().replace(' ', '_').replace('(', '').replace(')', '')}", "task_alias": subject, "dataset_name": subject, # "description": description, diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml new file mode 100644 index 00000000..7ec8caad --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_accounting_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Accounting (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_accounting_university" +"task_alias": "Accounting (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml index f57dc08c..621312d9 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_general.yaml @@ -1,5 +1,5 @@ "dataset_name": "Arabic Language (General)" -"tag": "arabicmmlu_language_tasks" "include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_arabic_language_(general)" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_general" "task_alias": "Arabic Language (General)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml index baf32676..0511b9d9 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_grammar.yaml @@ -1,5 +1,5 @@ "dataset_name": "Arabic Language (Grammar)" -"tag": "arabicmmlu_language_tasks" "include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_arabic_language_(grammar)" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_grammar" "task_alias": "Arabic Language (Grammar)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml new file mode 100644 index 00000000..77dc002b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_high_school" +"task_alias": "Arabic Language (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml new file mode 100644 index 00000000..9b9b2007 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_middle_school" +"task_alias": "Arabic Language (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml new file mode 100644 index 00000000..3c0f045d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_arabic_language_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Arabic Language (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_language_tasks" +"task": "arabicmmlu_arabic_language_primary_school" +"task_alias": "Arabic Language (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml new file mode 100644 index 00000000..865a477d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_biology_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Biology (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_biology_high_school" +"task_alias": "Biology (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml new file mode 100644 index 00000000..6f81e922 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Civics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_civics_high_school" +"task_alias": "Civics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml new file mode 100644 index 00000000..3e82c777 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_civics_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Civics (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_civics_middle_school" +"task_alias": "Civics (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml new file mode 100644 index 00000000..59aa929d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_high_school" +"task_alias": "Computer Science (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml new file mode 100644 index 00000000..3ecdc106 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_middle_school" +"task_alias": "Computer Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml new file mode 100644 index 00000000..8feec4aa --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_primary_school" +"task_alias": "Computer Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml new file mode 100644 index 00000000..327cfab6 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_computer_science_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Computer Science (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_computer_science_university" +"task_alias": "Computer Science (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml index d40c9eb9..ab951dfc 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_driving_test.yaml @@ -1,5 +1,5 @@ "dataset_name": "Driving Test" -"tag": "arabicmmlu_other_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" "task": "arabicmmlu_driving_test" "task_alias": "Driving Test" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml new file mode 100644 index 00000000..78cba021 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_high_school" +"task_alias": "Economics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml new file mode 100644 index 00000000..ed004b34 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_middle_school" +"task_alias": "Economics (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml new file mode 100644 index 00000000..76bfe4f1 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_economics_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Economics (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_economics_university" +"task_alias": "Economics (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml index fbd8839d..8ac6e710 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge.yaml @@ -1,5 +1,5 @@ "dataset_name": "General Knowledge" -"tag": "arabicmmlu_other_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" "task": "arabicmmlu_general_knowledge" "task_alias": "General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml new file mode 100644 index 00000000..a6e4b7c9 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "General Knowledge (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_general_knowledge_middle_school" +"task_alias": "General Knowledge (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml new file mode 100644 index 00000000..07358299 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_general_knowledge_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "General Knowledge (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_general_knowledge_primary_school" +"task_alias": "General Knowledge (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml new file mode 100644 index 00000000..b6264fc4 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_high_school" +"task_alias": "Geography (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml new file mode 100644 index 00000000..6483749f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_middle_school" +"task_alias": "Geography (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml new file mode 100644 index 00000000..1465fb05 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_geography_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Geography (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_geography_primary_school" +"task_alias": "Geography (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml deleted file mode 100644 index 17d17bc8..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_arabic_language" -"task_alias": "High Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml deleted file mode 100644 index 2b5baf0b..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_biology.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Biology" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_biology" -"task_alias": "High Biology" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml deleted file mode 100644 index 87050922..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_civics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Civics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_civics" -"task_alias": "High Civics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml deleted file mode 100644 index f1a66a5c..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_computer_science" -"task_alias": "High Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml deleted file mode 100644 index a1d6e90f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_economics" -"task_alias": "High Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml deleted file mode 100644 index ad980432..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_geography" -"task_alias": "High Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml deleted file mode 100644 index 49c82669..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_history" -"task_alias": "High History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml deleted file mode 100644 index 15b5358b..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_islamic_studies" -"task_alias": "High Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml deleted file mode 100644 index e0b20e30..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_philosophy.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Philosophy" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_philosophy" -"task_alias": "High Philosophy" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml deleted file mode 100644 index a7fe5ecc..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_high_physics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "High Physics" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_high_physics" -"task_alias": "High Physics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml new file mode 100644 index 00000000..b97a081a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_high_school" +"task_alias": "History (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml new file mode 100644 index 00000000..3435604a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_middle_school" +"task_alias": "History (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml new file mode 100644 index 00000000..c156ff52 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_history_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "History (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_history_primary_school" +"task_alias": "History (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml index bacd5ace..4d5020a5 100644 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies.yaml @@ -1,5 +1,5 @@ "dataset_name": "Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" "include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" "task": "arabicmmlu_islamic_studies" "task_alias": "Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml new file mode 100644 index 00000000..5bae042f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_high_school" +"task_alias": "Islamic Studies (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml new file mode 100644 index 00000000..af192fc1 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_middle_school" +"task_alias": "Islamic Studies (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml new file mode 100644 index 00000000..c4e5d354 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_islamic_studies_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Islamic Studies (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_islamic_studies_primary_school" +"task_alias": "Islamic Studies (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml new file mode 100644 index 00000000..5e2b6a4a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_law_professional.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Law (Professional)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_law_professional" +"task_alias": "Law (Professional)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml new file mode 100644 index 00000000..386c8e6b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_management_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Management (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_other_tasks" +"task": "arabicmmlu_management_university" +"task_alias": "Management (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml new file mode 100644 index 00000000..1df99b8a --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_math_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Math (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_math_primary_school" +"task_alias": "Math (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml deleted file mode 100644 index 14a2ab1a..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_arabic_language" -"task_alias": "Middle Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml deleted file mode 100644 index 44ba95d4..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_civics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Civics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_civics" -"task_alias": "Middle Civics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml deleted file mode 100644 index 8dd4136f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_computer_science" -"task_alias": "Middle Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml deleted file mode 100644 index 312fa2e3..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_economics" -"task_alias": "Middle Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml deleted file mode 100644 index c359d85a..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_general_knowledge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle General Knowledge" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_general_knowledge" -"task_alias": "Middle General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml deleted file mode 100644 index 111b13cf..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_geography" -"task_alias": "Middle Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml deleted file mode 100644 index 615a2e51..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_history" -"task_alias": "Middle History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml deleted file mode 100644 index 44922360..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_islamic_studies" -"task_alias": "Middle Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml deleted file mode 100644 index 265cdbaa..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_natural_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Natural Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_natural_science" -"task_alias": "Middle Natural Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml deleted file mode 100644 index 84c247dd..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_middle_social_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Middle Social Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_middle_social_science" -"task_alias": "Middle Social Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml new file mode 100644 index 00000000..3b61531d --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Natural Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_natural_science_middle_school" +"task_alias": "Natural Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml new file mode 100644 index 00000000..1efd6c9b --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_natural_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Natural Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_natural_science_primary_school" +"task_alias": "Natural Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml new file mode 100644 index 00000000..66715bb0 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_philosophy_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Philosophy (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_humanities_tasks" +"task": "arabicmmlu_philosophy_high_school" +"task_alias": "Philosophy (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml new file mode 100644 index 00000000..00ecf8ad --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_physics_high_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Physics (High School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_stem_tasks" +"task": "arabicmmlu_physics_high_school" +"task_alias": "Physics (High School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml new file mode 100644 index 00000000..1f64125f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_political_science_university.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Political Science (University)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_political_science_university" +"task_alias": "Political Science (University)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml deleted file mode 100644 index 700bc078..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_arabic_language.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Arabic Language" -"tag": "arabicmmlu_language_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_arabic_language" -"task_alias": "Primary Arabic Language" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml deleted file mode 100644 index b89089cd..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_computer_science" -"task_alias": "Primary Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml deleted file mode 100644 index 85dd0b7f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_general_knowledge.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary General Knowledge" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_general_knowledge" -"task_alias": "Primary General Knowledge" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml deleted file mode 100644 index f7efc487..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_geography.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Geography" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_geography" -"task_alias": "Primary Geography" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml deleted file mode 100644 index f7d69ca9..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_history.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary History" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_history" -"task_alias": "Primary History" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml deleted file mode 100644 index b36cd640..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_islamic_studies.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Islamic Studies" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_islamic_studies" -"task_alias": "Primary Islamic Studies" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml deleted file mode 100644 index 0e53adcf..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_math.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Math" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_math" -"task_alias": "Primary Math" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml deleted file mode 100644 index 4e208c76..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_natural_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Natural Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_natural_science" -"task_alias": "Primary Natural Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml deleted file mode 100644 index fee4fe5d..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_primary_social_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Primary Social Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_primary_social_science" -"task_alias": "Primary Social Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml deleted file mode 100644 index 20bf6c5f..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_prof_law.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Prof Law" -"tag": "arabicmmlu_humanities_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_prof_law" -"task_alias": "Prof Law" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml new file mode 100644 index 00000000..b876649f --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_middle_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Social Science (Middle School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_social_science_middle_school" +"task_alias": "Social Science (Middle School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml new file mode 100644 index 00000000..6f688480 --- /dev/null +++ b/lm_eval/tasks/arabicmmlu/arabicmmlu_social_science_primary_school.yaml @@ -0,0 +1,5 @@ +"dataset_name": "Social Science (Primary School)" +"include": "_default_arabicmmlu_template_yaml" +"tag": "arabicmmlu_social_science_tasks" +"task": "arabicmmlu_social_science_primary_school" +"task_alias": "Social Science (Primary School)" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml deleted file mode 100644 index 6d1d9412..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_accounting.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Accounting" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_accounting" -"task_alias": "Univ Accounting" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml deleted file mode 100644 index 42e7e89a..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_computer_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Computer Science" -"tag": "arabicmmlu_stem_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_computer_science" -"task_alias": "Univ Computer Science" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml deleted file mode 100644 index 21015ffa..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_economics.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Economics" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_economics" -"task_alias": "Univ Economics" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml deleted file mode 100644 index e69ad74b..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_management.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Management" -"tag": "arabicmmlu_other_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_management" -"task_alias": "Univ Management" diff --git a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml b/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml deleted file mode 100644 index bb85a104..00000000 --- a/lm_eval/tasks/arabicmmlu/arabicmmlu_univ_political_science.yaml +++ /dev/null @@ -1,5 +0,0 @@ -"dataset_name": "Univ Political Science" -"tag": "arabicmmlu_social_science_tasks" -"include": "_default_arabicmmlu_template_yaml" -"task": "arabicmmlu_univ_political_science" -"task_alias": "Univ Political Science" diff --git a/lm_eval/tasks/arabicmmlu/utils.py b/lm_eval/tasks/arabicmmlu/utils.py index 2c476131..a572489e 100644 --- a/lm_eval/tasks/arabicmmlu/utils.py +++ b/lm_eval/tasks/arabicmmlu/utils.py @@ -23,7 +23,7 @@ def doc_to_text(doc): question = ( doc["Question"] - if doc["Context"] == "" + if not doc["Context"] else f"{doc['Context']}\n\n{doc['Question']}" ) -- GitLab From 3a4e46741749a8c6d7f702e015285653bc1acdb0 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Tue, 21 Jan 2025 06:04:00 +0900 Subject: [PATCH 03/32] apply precommit (#2636) --- lm_eval/tasks/global_mmlu/README.md | 19 ++++- .../global_mmlu/{ => default}/_default_yaml | 0 .../{ => default}/_generate_configs.py | 0 .../{ => default}/global_mmlu_ar.yaml | 0 .../{ => default}/global_mmlu_bn.yaml | 0 .../{ => default}/global_mmlu_de.yaml | 0 .../{ => default}/global_mmlu_en.yaml | 0 .../{ => default}/global_mmlu_es.yaml | 0 .../{ => default}/global_mmlu_fr.yaml | 0 .../{ => default}/global_mmlu_hi.yaml | 0 .../{ => default}/global_mmlu_id.yaml | 0 .../{ => default}/global_mmlu_it.yaml | 0 .../{ => default}/global_mmlu_ja.yaml | 0 .../{ => default}/global_mmlu_ko.yaml | 0 .../{ => default}/global_mmlu_pt.yaml | 0 .../{ => default}/global_mmlu_sw.yaml | 0 .../{ => default}/global_mmlu_yo.yaml | 0 .../{ => default}/global_mmlu_zh.yaml | 0 .../global_mmlu/full/am/_am_template_yaml | 16 ++++ .../full/am/_global_mmlu_full_am.yaml | 11 +++ .../am/_global_mmlu_full_am_humanities.yaml | 8 ++ .../full/am/_global_mmlu_full_am_other.yaml | 8 ++ .../_global_mmlu_full_am_social_sciences.yaml | 8 ++ .../full/am/_global_mmlu_full_am_stem.yaml | 8 ++ .../global_mmlu_full_am_abstract_algebra.yaml | 5 ++ .../full/am/global_mmlu_full_am_anatomy.yaml | 5 ++ .../am/global_mmlu_full_am_astronomy.yaml | 5 ++ .../global_mmlu_full_am_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_am_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_am_college_biology.yaml | 5 ++ ...global_mmlu_full_am_college_chemistry.yaml | 5 ++ ...mmlu_full_am_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_am_college_mathematics.yaml | 5 ++ .../global_mmlu_full_am_college_medicine.yaml | 5 ++ .../global_mmlu_full_am_college_physics.yaml | 5 ++ ...global_mmlu_full_am_computer_security.yaml | 5 ++ ...lobal_mmlu_full_am_conceptual_physics.yaml | 5 ++ .../am/global_mmlu_full_am_econometrics.yaml | 5 ++ ...l_mmlu_full_am_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_am_elementary_mathematics.yaml | 5 ++ .../am/global_mmlu_full_am_formal_logic.yaml | 5 ++ .../am/global_mmlu_full_am_global_facts.yaml | 5 ++ ...obal_mmlu_full_am_high_school_biology.yaml | 5 ++ ...al_mmlu_full_am_high_school_chemistry.yaml | 5 ++ ..._full_am_high_school_computer_science.yaml | 5 ++ ..._full_am_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_am_high_school_geography.yaml | 5 ++ ...m_high_school_government_and_politics.yaml | 5 ++ ...lu_full_am_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_am_high_school_mathematics.yaml | 5 ++ ...lu_full_am_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_am_high_school_physics.yaml | 5 ++ ...l_mmlu_full_am_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_am_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_am_high_school_us_history.yaml | 5 ++ ...mlu_full_am_high_school_world_history.yaml | 5 ++ .../am/global_mmlu_full_am_human_aging.yaml | 5 ++ .../global_mmlu_full_am_human_sexuality.yaml | 5 ++ ...global_mmlu_full_am_international_law.yaml | 5 ++ .../am/global_mmlu_full_am_jurisprudence.yaml | 5 ++ ...global_mmlu_full_am_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_am_machine_learning.yaml | 5 ++ .../am/global_mmlu_full_am_management.yaml | 5 ++ .../am/global_mmlu_full_am_marketing.yaml | 5 ++ .../global_mmlu_full_am_medical_genetics.yaml | 5 ++ .../am/global_mmlu_full_am_miscellaneous.yaml | 5 ++ .../global_mmlu_full_am_moral_disputes.yaml | 5 ++ .../global_mmlu_full_am_moral_scenarios.yaml | 5 ++ .../am/global_mmlu_full_am_nutrition.yaml | 5 ++ .../am/global_mmlu_full_am_philosophy.yaml | 5 ++ .../am/global_mmlu_full_am_prehistory.yaml | 5 ++ ..._mmlu_full_am_professional_accounting.yaml | 5 ++ .../global_mmlu_full_am_professional_law.yaml | 5 ++ ...al_mmlu_full_am_professional_medicine.yaml | 5 ++ ..._mmlu_full_am_professional_psychology.yaml | 5 ++ .../global_mmlu_full_am_public_relations.yaml | 5 ++ .../global_mmlu_full_am_security_studies.yaml | 5 ++ .../am/global_mmlu_full_am_sociology.yaml | 5 ++ ...global_mmlu_full_am_us_foreign_policy.yaml | 5 ++ .../full/am/global_mmlu_full_am_virology.yaml | 5 ++ .../global_mmlu_full_am_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/am/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/ar/_ar_template_yaml | 16 ++++ .../full/ar/_global_mmlu_full_ar.yaml | 11 +++ .../ar/_global_mmlu_full_ar_humanities.yaml | 8 ++ .../full/ar/_global_mmlu_full_ar_other.yaml | 8 ++ .../_global_mmlu_full_ar_social_sciences.yaml | 8 ++ .../full/ar/_global_mmlu_full_ar_stem.yaml | 8 ++ .../global_mmlu_full_ar_abstract_algebra.yaml | 5 ++ .../full/ar/global_mmlu_full_ar_anatomy.yaml | 5 ++ .../ar/global_mmlu_full_ar_astronomy.yaml | 5 ++ .../global_mmlu_full_ar_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ar_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ar_college_biology.yaml | 5 ++ ...global_mmlu_full_ar_college_chemistry.yaml | 5 ++ ...mmlu_full_ar_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ar_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ar_college_medicine.yaml | 5 ++ .../global_mmlu_full_ar_college_physics.yaml | 5 ++ ...global_mmlu_full_ar_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ar_conceptual_physics.yaml | 5 ++ .../ar/global_mmlu_full_ar_econometrics.yaml | 5 ++ ...l_mmlu_full_ar_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ar_elementary_mathematics.yaml | 5 ++ .../ar/global_mmlu_full_ar_formal_logic.yaml | 5 ++ .../ar/global_mmlu_full_ar_global_facts.yaml | 5 ++ ...obal_mmlu_full_ar_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ar_high_school_chemistry.yaml | 5 ++ ..._full_ar_high_school_computer_science.yaml | 5 ++ ..._full_ar_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ar_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ar_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ar_high_school_mathematics.yaml | 5 ++ ...lu_full_ar_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ar_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ar_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ar_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ar_high_school_us_history.yaml | 5 ++ ...mlu_full_ar_high_school_world_history.yaml | 5 ++ .../ar/global_mmlu_full_ar_human_aging.yaml | 5 ++ .../global_mmlu_full_ar_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ar_international_law.yaml | 5 ++ .../ar/global_mmlu_full_ar_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ar_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ar_machine_learning.yaml | 5 ++ .../ar/global_mmlu_full_ar_management.yaml | 5 ++ .../ar/global_mmlu_full_ar_marketing.yaml | 5 ++ .../global_mmlu_full_ar_medical_genetics.yaml | 5 ++ .../ar/global_mmlu_full_ar_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ar_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ar_moral_scenarios.yaml | 5 ++ .../ar/global_mmlu_full_ar_nutrition.yaml | 5 ++ .../ar/global_mmlu_full_ar_philosophy.yaml | 5 ++ .../ar/global_mmlu_full_ar_prehistory.yaml | 5 ++ ..._mmlu_full_ar_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ar_professional_law.yaml | 5 ++ ...al_mmlu_full_ar_professional_medicine.yaml | 5 ++ ..._mmlu_full_ar_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ar_public_relations.yaml | 5 ++ .../global_mmlu_full_ar_security_studies.yaml | 5 ++ .../ar/global_mmlu_full_ar_sociology.yaml | 5 ++ ...global_mmlu_full_ar_us_foreign_policy.yaml | 5 ++ .../full/ar/global_mmlu_full_ar_virology.yaml | 5 ++ .../global_mmlu_full_ar_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ar/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/bn/_bn_template_yaml | 16 ++++ .../full/bn/_global_mmlu_full_bn.yaml | 11 +++ .../bn/_global_mmlu_full_bn_humanities.yaml | 8 ++ .../full/bn/_global_mmlu_full_bn_other.yaml | 8 ++ .../_global_mmlu_full_bn_social_sciences.yaml | 8 ++ .../full/bn/_global_mmlu_full_bn_stem.yaml | 8 ++ .../global_mmlu_full_bn_abstract_algebra.yaml | 5 ++ .../full/bn/global_mmlu_full_bn_anatomy.yaml | 5 ++ .../bn/global_mmlu_full_bn_astronomy.yaml | 5 ++ .../global_mmlu_full_bn_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_bn_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_bn_college_biology.yaml | 5 ++ ...global_mmlu_full_bn_college_chemistry.yaml | 5 ++ ...mmlu_full_bn_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_bn_college_mathematics.yaml | 5 ++ .../global_mmlu_full_bn_college_medicine.yaml | 5 ++ .../global_mmlu_full_bn_college_physics.yaml | 5 ++ ...global_mmlu_full_bn_computer_security.yaml | 5 ++ ...lobal_mmlu_full_bn_conceptual_physics.yaml | 5 ++ .../bn/global_mmlu_full_bn_econometrics.yaml | 5 ++ ...l_mmlu_full_bn_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_bn_elementary_mathematics.yaml | 5 ++ .../bn/global_mmlu_full_bn_formal_logic.yaml | 5 ++ .../bn/global_mmlu_full_bn_global_facts.yaml | 5 ++ ...obal_mmlu_full_bn_high_school_biology.yaml | 5 ++ ...al_mmlu_full_bn_high_school_chemistry.yaml | 5 ++ ..._full_bn_high_school_computer_science.yaml | 5 ++ ..._full_bn_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_bn_high_school_geography.yaml | 5 ++ ...n_high_school_government_and_politics.yaml | 5 ++ ...lu_full_bn_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_bn_high_school_mathematics.yaml | 5 ++ ...lu_full_bn_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_bn_high_school_physics.yaml | 5 ++ ...l_mmlu_full_bn_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_bn_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_bn_high_school_us_history.yaml | 5 ++ ...mlu_full_bn_high_school_world_history.yaml | 5 ++ .../bn/global_mmlu_full_bn_human_aging.yaml | 5 ++ .../global_mmlu_full_bn_human_sexuality.yaml | 5 ++ ...global_mmlu_full_bn_international_law.yaml | 5 ++ .../bn/global_mmlu_full_bn_jurisprudence.yaml | 5 ++ ...global_mmlu_full_bn_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_bn_machine_learning.yaml | 5 ++ .../bn/global_mmlu_full_bn_management.yaml | 5 ++ .../bn/global_mmlu_full_bn_marketing.yaml | 5 ++ .../global_mmlu_full_bn_medical_genetics.yaml | 5 ++ .../bn/global_mmlu_full_bn_miscellaneous.yaml | 5 ++ .../global_mmlu_full_bn_moral_disputes.yaml | 5 ++ .../global_mmlu_full_bn_moral_scenarios.yaml | 5 ++ .../bn/global_mmlu_full_bn_nutrition.yaml | 5 ++ .../bn/global_mmlu_full_bn_philosophy.yaml | 5 ++ .../bn/global_mmlu_full_bn_prehistory.yaml | 5 ++ ..._mmlu_full_bn_professional_accounting.yaml | 5 ++ .../global_mmlu_full_bn_professional_law.yaml | 5 ++ ...al_mmlu_full_bn_professional_medicine.yaml | 5 ++ ..._mmlu_full_bn_professional_psychology.yaml | 5 ++ .../global_mmlu_full_bn_public_relations.yaml | 5 ++ .../global_mmlu_full_bn_security_studies.yaml | 5 ++ .../bn/global_mmlu_full_bn_sociology.yaml | 5 ++ ...global_mmlu_full_bn_us_foreign_policy.yaml | 5 ++ .../full/bn/global_mmlu_full_bn_virology.yaml | 5 ++ .../global_mmlu_full_bn_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/bn/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/cs/_cs_template_yaml | 16 ++++ .../full/cs/_global_mmlu_full_cs.yaml | 11 +++ .../cs/_global_mmlu_full_cs_humanities.yaml | 8 ++ .../full/cs/_global_mmlu_full_cs_other.yaml | 8 ++ .../_global_mmlu_full_cs_social_sciences.yaml | 8 ++ .../full/cs/_global_mmlu_full_cs_stem.yaml | 8 ++ .../global_mmlu_full_cs_abstract_algebra.yaml | 5 ++ .../full/cs/global_mmlu_full_cs_anatomy.yaml | 5 ++ .../cs/global_mmlu_full_cs_astronomy.yaml | 5 ++ .../global_mmlu_full_cs_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_cs_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_cs_college_biology.yaml | 5 ++ ...global_mmlu_full_cs_college_chemistry.yaml | 5 ++ ...mmlu_full_cs_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_cs_college_mathematics.yaml | 5 ++ .../global_mmlu_full_cs_college_medicine.yaml | 5 ++ .../global_mmlu_full_cs_college_physics.yaml | 5 ++ ...global_mmlu_full_cs_computer_security.yaml | 5 ++ ...lobal_mmlu_full_cs_conceptual_physics.yaml | 5 ++ .../cs/global_mmlu_full_cs_econometrics.yaml | 5 ++ ...l_mmlu_full_cs_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_cs_elementary_mathematics.yaml | 5 ++ .../cs/global_mmlu_full_cs_formal_logic.yaml | 5 ++ .../cs/global_mmlu_full_cs_global_facts.yaml | 5 ++ ...obal_mmlu_full_cs_high_school_biology.yaml | 5 ++ ...al_mmlu_full_cs_high_school_chemistry.yaml | 5 ++ ..._full_cs_high_school_computer_science.yaml | 5 ++ ..._full_cs_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_cs_high_school_geography.yaml | 5 ++ ...s_high_school_government_and_politics.yaml | 5 ++ ...lu_full_cs_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_cs_high_school_mathematics.yaml | 5 ++ ...lu_full_cs_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_cs_high_school_physics.yaml | 5 ++ ...l_mmlu_full_cs_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_cs_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_cs_high_school_us_history.yaml | 5 ++ ...mlu_full_cs_high_school_world_history.yaml | 5 ++ .../cs/global_mmlu_full_cs_human_aging.yaml | 5 ++ .../global_mmlu_full_cs_human_sexuality.yaml | 5 ++ ...global_mmlu_full_cs_international_law.yaml | 5 ++ .../cs/global_mmlu_full_cs_jurisprudence.yaml | 5 ++ ...global_mmlu_full_cs_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_cs_machine_learning.yaml | 5 ++ .../cs/global_mmlu_full_cs_management.yaml | 5 ++ .../cs/global_mmlu_full_cs_marketing.yaml | 5 ++ .../global_mmlu_full_cs_medical_genetics.yaml | 5 ++ .../cs/global_mmlu_full_cs_miscellaneous.yaml | 5 ++ .../global_mmlu_full_cs_moral_disputes.yaml | 5 ++ .../global_mmlu_full_cs_moral_scenarios.yaml | 5 ++ .../cs/global_mmlu_full_cs_nutrition.yaml | 5 ++ .../cs/global_mmlu_full_cs_philosophy.yaml | 5 ++ .../cs/global_mmlu_full_cs_prehistory.yaml | 5 ++ ..._mmlu_full_cs_professional_accounting.yaml | 5 ++ .../global_mmlu_full_cs_professional_law.yaml | 5 ++ ...al_mmlu_full_cs_professional_medicine.yaml | 5 ++ ..._mmlu_full_cs_professional_psychology.yaml | 5 ++ .../global_mmlu_full_cs_public_relations.yaml | 5 ++ .../global_mmlu_full_cs_security_studies.yaml | 5 ++ .../cs/global_mmlu_full_cs_sociology.yaml | 5 ++ ...global_mmlu_full_cs_us_foreign_policy.yaml | 5 ++ .../full/cs/global_mmlu_full_cs_virology.yaml | 5 ++ .../global_mmlu_full_cs_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/cs/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/de/_de_template_yaml | 16 ++++ .../full/de/_global_mmlu_full_de.yaml | 11 +++ .../de/_global_mmlu_full_de_humanities.yaml | 8 ++ .../full/de/_global_mmlu_full_de_other.yaml | 8 ++ .../_global_mmlu_full_de_social_sciences.yaml | 8 ++ .../full/de/_global_mmlu_full_de_stem.yaml | 8 ++ .../global_mmlu_full_de_abstract_algebra.yaml | 5 ++ .../full/de/global_mmlu_full_de_anatomy.yaml | 5 ++ .../de/global_mmlu_full_de_astronomy.yaml | 5 ++ .../global_mmlu_full_de_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_de_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_de_college_biology.yaml | 5 ++ ...global_mmlu_full_de_college_chemistry.yaml | 5 ++ ...mmlu_full_de_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_de_college_mathematics.yaml | 5 ++ .../global_mmlu_full_de_college_medicine.yaml | 5 ++ .../global_mmlu_full_de_college_physics.yaml | 5 ++ ...global_mmlu_full_de_computer_security.yaml | 5 ++ ...lobal_mmlu_full_de_conceptual_physics.yaml | 5 ++ .../de/global_mmlu_full_de_econometrics.yaml | 5 ++ ...l_mmlu_full_de_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_de_elementary_mathematics.yaml | 5 ++ .../de/global_mmlu_full_de_formal_logic.yaml | 5 ++ .../de/global_mmlu_full_de_global_facts.yaml | 5 ++ ...obal_mmlu_full_de_high_school_biology.yaml | 5 ++ ...al_mmlu_full_de_high_school_chemistry.yaml | 5 ++ ..._full_de_high_school_computer_science.yaml | 5 ++ ..._full_de_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_de_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_de_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_de_high_school_mathematics.yaml | 5 ++ ...lu_full_de_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_de_high_school_physics.yaml | 5 ++ ...l_mmlu_full_de_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_de_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_de_high_school_us_history.yaml | 5 ++ ...mlu_full_de_high_school_world_history.yaml | 5 ++ .../de/global_mmlu_full_de_human_aging.yaml | 5 ++ .../global_mmlu_full_de_human_sexuality.yaml | 5 ++ ...global_mmlu_full_de_international_law.yaml | 5 ++ .../de/global_mmlu_full_de_jurisprudence.yaml | 5 ++ ...global_mmlu_full_de_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_de_machine_learning.yaml | 5 ++ .../de/global_mmlu_full_de_management.yaml | 5 ++ .../de/global_mmlu_full_de_marketing.yaml | 5 ++ .../global_mmlu_full_de_medical_genetics.yaml | 5 ++ .../de/global_mmlu_full_de_miscellaneous.yaml | 5 ++ .../global_mmlu_full_de_moral_disputes.yaml | 5 ++ .../global_mmlu_full_de_moral_scenarios.yaml | 5 ++ .../de/global_mmlu_full_de_nutrition.yaml | 5 ++ .../de/global_mmlu_full_de_philosophy.yaml | 5 ++ .../de/global_mmlu_full_de_prehistory.yaml | 5 ++ ..._mmlu_full_de_professional_accounting.yaml | 5 ++ .../global_mmlu_full_de_professional_law.yaml | 5 ++ ...al_mmlu_full_de_professional_medicine.yaml | 5 ++ ..._mmlu_full_de_professional_psychology.yaml | 5 ++ .../global_mmlu_full_de_public_relations.yaml | 5 ++ .../global_mmlu_full_de_security_studies.yaml | 5 ++ .../de/global_mmlu_full_de_sociology.yaml | 5 ++ ...global_mmlu_full_de_us_foreign_policy.yaml | 5 ++ .../full/de/global_mmlu_full_de_virology.yaml | 5 ++ .../global_mmlu_full_de_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/de/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/el/_el_template_yaml | 16 ++++ .../full/el/_global_mmlu_full_el.yaml | 11 +++ .../el/_global_mmlu_full_el_humanities.yaml | 8 ++ .../full/el/_global_mmlu_full_el_other.yaml | 8 ++ .../_global_mmlu_full_el_social_sciences.yaml | 8 ++ .../full/el/_global_mmlu_full_el_stem.yaml | 8 ++ .../global_mmlu_full_el_abstract_algebra.yaml | 5 ++ .../full/el/global_mmlu_full_el_anatomy.yaml | 5 ++ .../el/global_mmlu_full_el_astronomy.yaml | 5 ++ .../global_mmlu_full_el_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_el_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_el_college_biology.yaml | 5 ++ ...global_mmlu_full_el_college_chemistry.yaml | 5 ++ ...mmlu_full_el_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_el_college_mathematics.yaml | 5 ++ .../global_mmlu_full_el_college_medicine.yaml | 5 ++ .../global_mmlu_full_el_college_physics.yaml | 5 ++ ...global_mmlu_full_el_computer_security.yaml | 5 ++ ...lobal_mmlu_full_el_conceptual_physics.yaml | 5 ++ .../el/global_mmlu_full_el_econometrics.yaml | 5 ++ ...l_mmlu_full_el_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_el_elementary_mathematics.yaml | 5 ++ .../el/global_mmlu_full_el_formal_logic.yaml | 5 ++ .../el/global_mmlu_full_el_global_facts.yaml | 5 ++ ...obal_mmlu_full_el_high_school_biology.yaml | 5 ++ ...al_mmlu_full_el_high_school_chemistry.yaml | 5 ++ ..._full_el_high_school_computer_science.yaml | 5 ++ ..._full_el_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_el_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...lu_full_el_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_el_high_school_mathematics.yaml | 5 ++ ...lu_full_el_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_el_high_school_physics.yaml | 5 ++ ...l_mmlu_full_el_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_el_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_el_high_school_us_history.yaml | 5 ++ ...mlu_full_el_high_school_world_history.yaml | 5 ++ .../el/global_mmlu_full_el_human_aging.yaml | 5 ++ .../global_mmlu_full_el_human_sexuality.yaml | 5 ++ ...global_mmlu_full_el_international_law.yaml | 5 ++ .../el/global_mmlu_full_el_jurisprudence.yaml | 5 ++ ...global_mmlu_full_el_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_el_machine_learning.yaml | 5 ++ .../el/global_mmlu_full_el_management.yaml | 5 ++ .../el/global_mmlu_full_el_marketing.yaml | 5 ++ .../global_mmlu_full_el_medical_genetics.yaml | 5 ++ .../el/global_mmlu_full_el_miscellaneous.yaml | 5 ++ .../global_mmlu_full_el_moral_disputes.yaml | 5 ++ .../global_mmlu_full_el_moral_scenarios.yaml | 5 ++ .../el/global_mmlu_full_el_nutrition.yaml | 5 ++ .../el/global_mmlu_full_el_philosophy.yaml | 5 ++ .../el/global_mmlu_full_el_prehistory.yaml | 5 ++ ..._mmlu_full_el_professional_accounting.yaml | 5 ++ .../global_mmlu_full_el_professional_law.yaml | 5 ++ ...al_mmlu_full_el_professional_medicine.yaml | 5 ++ ..._mmlu_full_el_professional_psychology.yaml | 5 ++ .../global_mmlu_full_el_public_relations.yaml | 5 ++ .../global_mmlu_full_el_security_studies.yaml | 5 ++ .../el/global_mmlu_full_el_sociology.yaml | 5 ++ ...global_mmlu_full_el_us_foreign_policy.yaml | 5 ++ .../full/el/global_mmlu_full_el_virology.yaml | 5 ++ .../global_mmlu_full_el_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/el/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/en/_en_template_yaml | 16 ++++ .../full/en/_global_mmlu_full_en.yaml | 11 +++ .../en/_global_mmlu_full_en_humanities.yaml | 8 ++ .../full/en/_global_mmlu_full_en_other.yaml | 8 ++ .../_global_mmlu_full_en_social_sciences.yaml | 8 ++ .../full/en/_global_mmlu_full_en_stem.yaml | 8 ++ .../global_mmlu_full_en_abstract_algebra.yaml | 5 ++ .../full/en/global_mmlu_full_en_anatomy.yaml | 5 ++ .../en/global_mmlu_full_en_astronomy.yaml | 5 ++ .../global_mmlu_full_en_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_en_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_en_college_biology.yaml | 5 ++ ...global_mmlu_full_en_college_chemistry.yaml | 5 ++ ...mmlu_full_en_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_en_college_mathematics.yaml | 5 ++ .../global_mmlu_full_en_college_medicine.yaml | 5 ++ .../global_mmlu_full_en_college_physics.yaml | 5 ++ ...global_mmlu_full_en_computer_security.yaml | 5 ++ ...lobal_mmlu_full_en_conceptual_physics.yaml | 5 ++ .../en/global_mmlu_full_en_econometrics.yaml | 5 ++ ...l_mmlu_full_en_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_en_elementary_mathematics.yaml | 5 ++ .../en/global_mmlu_full_en_formal_logic.yaml | 5 ++ .../en/global_mmlu_full_en_global_facts.yaml | 5 ++ ...obal_mmlu_full_en_high_school_biology.yaml | 5 ++ ...al_mmlu_full_en_high_school_chemistry.yaml | 5 ++ ..._full_en_high_school_computer_science.yaml | 5 ++ ..._full_en_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_en_high_school_geography.yaml | 5 ++ ...n_high_school_government_and_politics.yaml | 5 ++ ...lu_full_en_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_en_high_school_mathematics.yaml | 5 ++ ...lu_full_en_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_en_high_school_physics.yaml | 5 ++ ...l_mmlu_full_en_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_en_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_en_high_school_us_history.yaml | 5 ++ ...mlu_full_en_high_school_world_history.yaml | 5 ++ .../en/global_mmlu_full_en_human_aging.yaml | 5 ++ .../global_mmlu_full_en_human_sexuality.yaml | 5 ++ ...global_mmlu_full_en_international_law.yaml | 5 ++ .../en/global_mmlu_full_en_jurisprudence.yaml | 5 ++ ...global_mmlu_full_en_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_en_machine_learning.yaml | 5 ++ .../en/global_mmlu_full_en_management.yaml | 5 ++ .../en/global_mmlu_full_en_marketing.yaml | 5 ++ .../global_mmlu_full_en_medical_genetics.yaml | 5 ++ .../en/global_mmlu_full_en_miscellaneous.yaml | 5 ++ .../global_mmlu_full_en_moral_disputes.yaml | 5 ++ .../global_mmlu_full_en_moral_scenarios.yaml | 5 ++ .../en/global_mmlu_full_en_nutrition.yaml | 5 ++ .../en/global_mmlu_full_en_philosophy.yaml | 5 ++ .../en/global_mmlu_full_en_prehistory.yaml | 5 ++ ..._mmlu_full_en_professional_accounting.yaml | 5 ++ .../global_mmlu_full_en_professional_law.yaml | 5 ++ ...al_mmlu_full_en_professional_medicine.yaml | 5 ++ ..._mmlu_full_en_professional_psychology.yaml | 5 ++ .../global_mmlu_full_en_public_relations.yaml | 5 ++ .../global_mmlu_full_en_security_studies.yaml | 5 ++ .../en/global_mmlu_full_en_sociology.yaml | 5 ++ ...global_mmlu_full_en_us_foreign_policy.yaml | 5 ++ .../full/en/global_mmlu_full_en_virology.yaml | 5 ++ .../global_mmlu_full_en_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/en/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/es/_es_template_yaml | 16 ++++ .../full/es/_global_mmlu_full_es.yaml | 11 +++ .../es/_global_mmlu_full_es_humanities.yaml | 8 ++ .../full/es/_global_mmlu_full_es_other.yaml | 8 ++ .../_global_mmlu_full_es_social_sciences.yaml | 8 ++ .../full/es/_global_mmlu_full_es_stem.yaml | 8 ++ .../global_mmlu_full_es_abstract_algebra.yaml | 5 ++ .../full/es/global_mmlu_full_es_anatomy.yaml | 5 ++ .../es/global_mmlu_full_es_astronomy.yaml | 5 ++ .../global_mmlu_full_es_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_es_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_es_college_biology.yaml | 5 ++ ...global_mmlu_full_es_college_chemistry.yaml | 5 ++ ...mmlu_full_es_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_es_college_mathematics.yaml | 5 ++ .../global_mmlu_full_es_college_medicine.yaml | 5 ++ .../global_mmlu_full_es_college_physics.yaml | 5 ++ ...global_mmlu_full_es_computer_security.yaml | 5 ++ ...lobal_mmlu_full_es_conceptual_physics.yaml | 5 ++ .../es/global_mmlu_full_es_econometrics.yaml | 5 ++ ...l_mmlu_full_es_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_es_elementary_mathematics.yaml | 5 ++ .../es/global_mmlu_full_es_formal_logic.yaml | 5 ++ .../es/global_mmlu_full_es_global_facts.yaml | 5 ++ ...obal_mmlu_full_es_high_school_biology.yaml | 5 ++ ...al_mmlu_full_es_high_school_chemistry.yaml | 5 ++ ..._full_es_high_school_computer_science.yaml | 5 ++ ..._full_es_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_es_high_school_geography.yaml | 5 ++ ...s_high_school_government_and_politics.yaml | 5 ++ ...lu_full_es_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_es_high_school_mathematics.yaml | 5 ++ ...lu_full_es_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_es_high_school_physics.yaml | 5 ++ ...l_mmlu_full_es_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_es_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_es_high_school_us_history.yaml | 5 ++ ...mlu_full_es_high_school_world_history.yaml | 5 ++ .../es/global_mmlu_full_es_human_aging.yaml | 5 ++ .../global_mmlu_full_es_human_sexuality.yaml | 5 ++ ...global_mmlu_full_es_international_law.yaml | 5 ++ .../es/global_mmlu_full_es_jurisprudence.yaml | 5 ++ ...global_mmlu_full_es_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_es_machine_learning.yaml | 5 ++ .../es/global_mmlu_full_es_management.yaml | 5 ++ .../es/global_mmlu_full_es_marketing.yaml | 5 ++ .../global_mmlu_full_es_medical_genetics.yaml | 5 ++ .../es/global_mmlu_full_es_miscellaneous.yaml | 5 ++ .../global_mmlu_full_es_moral_disputes.yaml | 5 ++ .../global_mmlu_full_es_moral_scenarios.yaml | 5 ++ .../es/global_mmlu_full_es_nutrition.yaml | 5 ++ .../es/global_mmlu_full_es_philosophy.yaml | 5 ++ .../es/global_mmlu_full_es_prehistory.yaml | 5 ++ ..._mmlu_full_es_professional_accounting.yaml | 5 ++ .../global_mmlu_full_es_professional_law.yaml | 5 ++ ...al_mmlu_full_es_professional_medicine.yaml | 5 ++ ..._mmlu_full_es_professional_psychology.yaml | 5 ++ .../global_mmlu_full_es_public_relations.yaml | 5 ++ .../global_mmlu_full_es_security_studies.yaml | 5 ++ .../es/global_mmlu_full_es_sociology.yaml | 5 ++ ...global_mmlu_full_es_us_foreign_policy.yaml | 5 ++ .../full/es/global_mmlu_full_es_virology.yaml | 5 ++ .../global_mmlu_full_es_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/es/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/fa/_fa_template_yaml | 16 ++++ .../full/fa/_global_mmlu_full_fa.yaml | 11 +++ .../fa/_global_mmlu_full_fa_humanities.yaml | 8 ++ .../full/fa/_global_mmlu_full_fa_other.yaml | 8 ++ .../_global_mmlu_full_fa_social_sciences.yaml | 8 ++ .../full/fa/_global_mmlu_full_fa_stem.yaml | 8 ++ .../global_mmlu_full_fa_abstract_algebra.yaml | 5 ++ .../full/fa/global_mmlu_full_fa_anatomy.yaml | 5 ++ .../fa/global_mmlu_full_fa_astronomy.yaml | 5 ++ .../global_mmlu_full_fa_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_fa_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_fa_college_biology.yaml | 5 ++ ...global_mmlu_full_fa_college_chemistry.yaml | 5 ++ ...mmlu_full_fa_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_fa_college_mathematics.yaml | 5 ++ .../global_mmlu_full_fa_college_medicine.yaml | 5 ++ .../global_mmlu_full_fa_college_physics.yaml | 5 ++ ...global_mmlu_full_fa_computer_security.yaml | 5 ++ ...lobal_mmlu_full_fa_conceptual_physics.yaml | 5 ++ .../fa/global_mmlu_full_fa_econometrics.yaml | 5 ++ ...l_mmlu_full_fa_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_fa_elementary_mathematics.yaml | 5 ++ .../fa/global_mmlu_full_fa_formal_logic.yaml | 5 ++ .../fa/global_mmlu_full_fa_global_facts.yaml | 5 ++ ...obal_mmlu_full_fa_high_school_biology.yaml | 5 ++ ...al_mmlu_full_fa_high_school_chemistry.yaml | 5 ++ ..._full_fa_high_school_computer_science.yaml | 5 ++ ..._full_fa_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_fa_high_school_geography.yaml | 5 ++ ...a_high_school_government_and_politics.yaml | 5 ++ ...lu_full_fa_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_fa_high_school_mathematics.yaml | 5 ++ ...lu_full_fa_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_fa_high_school_physics.yaml | 5 ++ ...l_mmlu_full_fa_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_fa_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_fa_high_school_us_history.yaml | 5 ++ ...mlu_full_fa_high_school_world_history.yaml | 5 ++ .../fa/global_mmlu_full_fa_human_aging.yaml | 5 ++ .../global_mmlu_full_fa_human_sexuality.yaml | 5 ++ ...global_mmlu_full_fa_international_law.yaml | 5 ++ .../fa/global_mmlu_full_fa_jurisprudence.yaml | 5 ++ ...global_mmlu_full_fa_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_fa_machine_learning.yaml | 5 ++ .../fa/global_mmlu_full_fa_management.yaml | 5 ++ .../fa/global_mmlu_full_fa_marketing.yaml | 5 ++ .../global_mmlu_full_fa_medical_genetics.yaml | 5 ++ .../fa/global_mmlu_full_fa_miscellaneous.yaml | 5 ++ .../global_mmlu_full_fa_moral_disputes.yaml | 5 ++ .../global_mmlu_full_fa_moral_scenarios.yaml | 5 ++ .../fa/global_mmlu_full_fa_nutrition.yaml | 5 ++ .../fa/global_mmlu_full_fa_philosophy.yaml | 5 ++ .../fa/global_mmlu_full_fa_prehistory.yaml | 5 ++ ..._mmlu_full_fa_professional_accounting.yaml | 5 ++ .../global_mmlu_full_fa_professional_law.yaml | 5 ++ ...al_mmlu_full_fa_professional_medicine.yaml | 5 ++ ..._mmlu_full_fa_professional_psychology.yaml | 5 ++ .../global_mmlu_full_fa_public_relations.yaml | 5 ++ .../global_mmlu_full_fa_security_studies.yaml | 5 ++ .../fa/global_mmlu_full_fa_sociology.yaml | 5 ++ ...global_mmlu_full_fa_us_foreign_policy.yaml | 5 ++ .../full/fa/global_mmlu_full_fa_virology.yaml | 5 ++ .../global_mmlu_full_fa_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/fa/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/fil/_fil_template_yaml | 16 ++++ .../full/fil/_global_mmlu_full_fil.yaml | 11 +++ .../fil/_global_mmlu_full_fil_humanities.yaml | 8 ++ .../full/fil/_global_mmlu_full_fil_other.yaml | 8 ++ ..._global_mmlu_full_fil_social_sciences.yaml | 8 ++ .../full/fil/_global_mmlu_full_fil_stem.yaml | 8 ++ ...global_mmlu_full_fil_abstract_algebra.yaml | 5 ++ .../fil/global_mmlu_full_fil_anatomy.yaml | 5 ++ .../fil/global_mmlu_full_fil_astronomy.yaml | 5 ++ .../global_mmlu_full_fil_business_ethics.yaml | 5 ++ ...obal_mmlu_full_fil_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_fil_college_biology.yaml | 5 ++ ...lobal_mmlu_full_fil_college_chemistry.yaml | 5 ++ ...mlu_full_fil_college_computer_science.yaml | 5 ++ ...bal_mmlu_full_fil_college_mathematics.yaml | 5 ++ ...global_mmlu_full_fil_college_medicine.yaml | 5 ++ .../global_mmlu_full_fil_college_physics.yaml | 5 ++ ...lobal_mmlu_full_fil_computer_security.yaml | 5 ++ ...obal_mmlu_full_fil_conceptual_physics.yaml | 5 ++ .../global_mmlu_full_fil_econometrics.yaml | 5 ++ ..._mmlu_full_fil_electrical_engineering.yaml | 5 ++ ..._mmlu_full_fil_elementary_mathematics.yaml | 5 ++ .../global_mmlu_full_fil_formal_logic.yaml | 5 ++ .../global_mmlu_full_fil_global_facts.yaml | 5 ++ ...bal_mmlu_full_fil_high_school_biology.yaml | 5 ++ ...l_mmlu_full_fil_high_school_chemistry.yaml | 5 ++ ...full_fil_high_school_computer_science.yaml | 5 ++ ...full_fil_high_school_european_history.yaml | 5 ++ ...l_mmlu_full_fil_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...u_full_fil_high_school_macroeconomics.yaml | 5 ++ ...mmlu_full_fil_high_school_mathematics.yaml | 5 ++ ...u_full_fil_high_school_microeconomics.yaml | 5 ++ ...bal_mmlu_full_fil_high_school_physics.yaml | 5 ++ ..._mmlu_full_fil_high_school_psychology.yaml | 5 ++ ..._mmlu_full_fil_high_school_statistics.yaml | 5 ++ ..._mmlu_full_fil_high_school_us_history.yaml | 5 ++ ...lu_full_fil_high_school_world_history.yaml | 5 ++ .../fil/global_mmlu_full_fil_human_aging.yaml | 5 ++ .../global_mmlu_full_fil_human_sexuality.yaml | 5 ++ ...lobal_mmlu_full_fil_international_law.yaml | 5 ++ .../global_mmlu_full_fil_jurisprudence.yaml | 5 ++ ...lobal_mmlu_full_fil_logical_fallacies.yaml | 5 ++ ...global_mmlu_full_fil_machine_learning.yaml | 5 ++ .../fil/global_mmlu_full_fil_management.yaml | 5 ++ .../fil/global_mmlu_full_fil_marketing.yaml | 5 ++ ...global_mmlu_full_fil_medical_genetics.yaml | 5 ++ .../global_mmlu_full_fil_miscellaneous.yaml | 5 ++ .../global_mmlu_full_fil_moral_disputes.yaml | 5 ++ .../global_mmlu_full_fil_moral_scenarios.yaml | 5 ++ .../fil/global_mmlu_full_fil_nutrition.yaml | 5 ++ .../fil/global_mmlu_full_fil_philosophy.yaml | 5 ++ .../fil/global_mmlu_full_fil_prehistory.yaml | 5 ++ ...mmlu_full_fil_professional_accounting.yaml | 5 ++ ...global_mmlu_full_fil_professional_law.yaml | 5 ++ ...l_mmlu_full_fil_professional_medicine.yaml | 5 ++ ...mmlu_full_fil_professional_psychology.yaml | 5 ++ ...global_mmlu_full_fil_public_relations.yaml | 5 ++ ...global_mmlu_full_fil_security_studies.yaml | 5 ++ .../fil/global_mmlu_full_fil_sociology.yaml | 5 ++ ...lobal_mmlu_full_fil_us_foreign_policy.yaml | 5 ++ .../fil/global_mmlu_full_fil_virology.yaml | 5 ++ .../global_mmlu_full_fil_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/fil/utils.py | 73 +++++++++++++++++++ .../global_mmlu/full/fr/_fr_template_yaml | 16 ++++ .../full/fr/_global_mmlu_full_fr.yaml | 11 +++ .../fr/_global_mmlu_full_fr_humanities.yaml | 8 ++ .../full/fr/_global_mmlu_full_fr_other.yaml | 8 ++ .../_global_mmlu_full_fr_social_sciences.yaml | 8 ++ .../full/fr/_global_mmlu_full_fr_stem.yaml | 8 ++ .../global_mmlu_full_fr_abstract_algebra.yaml | 5 ++ .../full/fr/global_mmlu_full_fr_anatomy.yaml | 5 ++ .../fr/global_mmlu_full_fr_astronomy.yaml | 5 ++ .../global_mmlu_full_fr_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_fr_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_fr_college_biology.yaml | 5 ++ ...global_mmlu_full_fr_college_chemistry.yaml | 5 ++ ...mmlu_full_fr_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_fr_college_mathematics.yaml | 5 ++ .../global_mmlu_full_fr_college_medicine.yaml | 5 ++ .../global_mmlu_full_fr_college_physics.yaml | 5 ++ ...global_mmlu_full_fr_computer_security.yaml | 5 ++ ...lobal_mmlu_full_fr_conceptual_physics.yaml | 5 ++ .../fr/global_mmlu_full_fr_econometrics.yaml | 5 ++ ...l_mmlu_full_fr_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_fr_elementary_mathematics.yaml | 5 ++ .../fr/global_mmlu_full_fr_formal_logic.yaml | 5 ++ .../fr/global_mmlu_full_fr_global_facts.yaml | 5 ++ ...obal_mmlu_full_fr_high_school_biology.yaml | 5 ++ ...al_mmlu_full_fr_high_school_chemistry.yaml | 5 ++ ..._full_fr_high_school_computer_science.yaml | 5 ++ ..._full_fr_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_fr_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_fr_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_fr_high_school_mathematics.yaml | 5 ++ ...lu_full_fr_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_fr_high_school_physics.yaml | 5 ++ ...l_mmlu_full_fr_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_fr_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_fr_high_school_us_history.yaml | 5 ++ ...mlu_full_fr_high_school_world_history.yaml | 5 ++ .../fr/global_mmlu_full_fr_human_aging.yaml | 5 ++ .../global_mmlu_full_fr_human_sexuality.yaml | 5 ++ ...global_mmlu_full_fr_international_law.yaml | 5 ++ .../fr/global_mmlu_full_fr_jurisprudence.yaml | 5 ++ ...global_mmlu_full_fr_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_fr_machine_learning.yaml | 5 ++ .../fr/global_mmlu_full_fr_management.yaml | 5 ++ .../fr/global_mmlu_full_fr_marketing.yaml | 5 ++ .../global_mmlu_full_fr_medical_genetics.yaml | 5 ++ .../fr/global_mmlu_full_fr_miscellaneous.yaml | 5 ++ .../global_mmlu_full_fr_moral_disputes.yaml | 5 ++ .../global_mmlu_full_fr_moral_scenarios.yaml | 5 ++ .../fr/global_mmlu_full_fr_nutrition.yaml | 5 ++ .../fr/global_mmlu_full_fr_philosophy.yaml | 5 ++ .../fr/global_mmlu_full_fr_prehistory.yaml | 5 ++ ..._mmlu_full_fr_professional_accounting.yaml | 5 ++ .../global_mmlu_full_fr_professional_law.yaml | 5 ++ ...al_mmlu_full_fr_professional_medicine.yaml | 5 ++ ..._mmlu_full_fr_professional_psychology.yaml | 5 ++ .../global_mmlu_full_fr_public_relations.yaml | 5 ++ .../global_mmlu_full_fr_security_studies.yaml | 5 ++ .../fr/global_mmlu_full_fr_sociology.yaml | 5 ++ ...global_mmlu_full_fr_us_foreign_policy.yaml | 5 ++ .../full/fr/global_mmlu_full_fr_virology.yaml | 5 ++ .../global_mmlu_full_fr_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/fr/utils.py | 73 +++++++++++++++++++ .../full/ha/_global_mmlu_full_ha.yaml | 11 +++ .../ha/_global_mmlu_full_ha_humanities.yaml | 8 ++ .../full/ha/_global_mmlu_full_ha_other.yaml | 8 ++ .../_global_mmlu_full_ha_social_sciences.yaml | 8 ++ .../full/ha/_global_mmlu_full_ha_stem.yaml | 8 ++ .../global_mmlu/full/ha/_ha_template_yaml | 16 ++++ .../global_mmlu_full_ha_abstract_algebra.yaml | 5 ++ .../full/ha/global_mmlu_full_ha_anatomy.yaml | 5 ++ .../ha/global_mmlu_full_ha_astronomy.yaml | 5 ++ .../global_mmlu_full_ha_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ha_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ha_college_biology.yaml | 5 ++ ...global_mmlu_full_ha_college_chemistry.yaml | 5 ++ ...mmlu_full_ha_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ha_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ha_college_medicine.yaml | 5 ++ .../global_mmlu_full_ha_college_physics.yaml | 5 ++ ...global_mmlu_full_ha_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ha_conceptual_physics.yaml | 5 ++ .../ha/global_mmlu_full_ha_econometrics.yaml | 5 ++ ...l_mmlu_full_ha_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ha_elementary_mathematics.yaml | 5 ++ .../ha/global_mmlu_full_ha_formal_logic.yaml | 5 ++ .../ha/global_mmlu_full_ha_global_facts.yaml | 5 ++ ...obal_mmlu_full_ha_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ha_high_school_chemistry.yaml | 5 ++ ..._full_ha_high_school_computer_science.yaml | 5 ++ ..._full_ha_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ha_high_school_geography.yaml | 5 ++ ...a_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ha_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ha_high_school_mathematics.yaml | 5 ++ ...lu_full_ha_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ha_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ha_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ha_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ha_high_school_us_history.yaml | 5 ++ ...mlu_full_ha_high_school_world_history.yaml | 5 ++ .../ha/global_mmlu_full_ha_human_aging.yaml | 5 ++ .../global_mmlu_full_ha_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ha_international_law.yaml | 5 ++ .../ha/global_mmlu_full_ha_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ha_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ha_machine_learning.yaml | 5 ++ .../ha/global_mmlu_full_ha_management.yaml | 5 ++ .../ha/global_mmlu_full_ha_marketing.yaml | 5 ++ .../global_mmlu_full_ha_medical_genetics.yaml | 5 ++ .../ha/global_mmlu_full_ha_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ha_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ha_moral_scenarios.yaml | 5 ++ .../ha/global_mmlu_full_ha_nutrition.yaml | 5 ++ .../ha/global_mmlu_full_ha_philosophy.yaml | 5 ++ .../ha/global_mmlu_full_ha_prehistory.yaml | 5 ++ ..._mmlu_full_ha_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ha_professional_law.yaml | 5 ++ ...al_mmlu_full_ha_professional_medicine.yaml | 5 ++ ..._mmlu_full_ha_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ha_public_relations.yaml | 5 ++ .../global_mmlu_full_ha_security_studies.yaml | 5 ++ .../ha/global_mmlu_full_ha_sociology.yaml | 5 ++ ...global_mmlu_full_ha_us_foreign_policy.yaml | 5 ++ .../full/ha/global_mmlu_full_ha_virology.yaml | 5 ++ .../global_mmlu_full_ha_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ha/utils.py | 73 +++++++++++++++++++ .../full/he/_global_mmlu_full_he.yaml | 11 +++ .../he/_global_mmlu_full_he_humanities.yaml | 8 ++ .../full/he/_global_mmlu_full_he_other.yaml | 8 ++ .../_global_mmlu_full_he_social_sciences.yaml | 8 ++ .../full/he/_global_mmlu_full_he_stem.yaml | 8 ++ .../global_mmlu/full/he/_he_template_yaml | 16 ++++ .../global_mmlu_full_he_abstract_algebra.yaml | 5 ++ .../full/he/global_mmlu_full_he_anatomy.yaml | 5 ++ .../he/global_mmlu_full_he_astronomy.yaml | 5 ++ .../global_mmlu_full_he_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_he_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_he_college_biology.yaml | 5 ++ ...global_mmlu_full_he_college_chemistry.yaml | 5 ++ ...mmlu_full_he_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_he_college_mathematics.yaml | 5 ++ .../global_mmlu_full_he_college_medicine.yaml | 5 ++ .../global_mmlu_full_he_college_physics.yaml | 5 ++ ...global_mmlu_full_he_computer_security.yaml | 5 ++ ...lobal_mmlu_full_he_conceptual_physics.yaml | 5 ++ .../he/global_mmlu_full_he_econometrics.yaml | 5 ++ ...l_mmlu_full_he_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_he_elementary_mathematics.yaml | 5 ++ .../he/global_mmlu_full_he_formal_logic.yaml | 5 ++ .../he/global_mmlu_full_he_global_facts.yaml | 5 ++ ...obal_mmlu_full_he_high_school_biology.yaml | 5 ++ ...al_mmlu_full_he_high_school_chemistry.yaml | 5 ++ ..._full_he_high_school_computer_science.yaml | 5 ++ ..._full_he_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_he_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_he_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_he_high_school_mathematics.yaml | 5 ++ ...lu_full_he_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_he_high_school_physics.yaml | 5 ++ ...l_mmlu_full_he_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_he_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_he_high_school_us_history.yaml | 5 ++ ...mlu_full_he_high_school_world_history.yaml | 5 ++ .../he/global_mmlu_full_he_human_aging.yaml | 5 ++ .../global_mmlu_full_he_human_sexuality.yaml | 5 ++ ...global_mmlu_full_he_international_law.yaml | 5 ++ .../he/global_mmlu_full_he_jurisprudence.yaml | 5 ++ ...global_mmlu_full_he_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_he_machine_learning.yaml | 5 ++ .../he/global_mmlu_full_he_management.yaml | 5 ++ .../he/global_mmlu_full_he_marketing.yaml | 5 ++ .../global_mmlu_full_he_medical_genetics.yaml | 5 ++ .../he/global_mmlu_full_he_miscellaneous.yaml | 5 ++ .../global_mmlu_full_he_moral_disputes.yaml | 5 ++ .../global_mmlu_full_he_moral_scenarios.yaml | 5 ++ .../he/global_mmlu_full_he_nutrition.yaml | 5 ++ .../he/global_mmlu_full_he_philosophy.yaml | 5 ++ .../he/global_mmlu_full_he_prehistory.yaml | 5 ++ ..._mmlu_full_he_professional_accounting.yaml | 5 ++ .../global_mmlu_full_he_professional_law.yaml | 5 ++ ...al_mmlu_full_he_professional_medicine.yaml | 5 ++ ..._mmlu_full_he_professional_psychology.yaml | 5 ++ .../global_mmlu_full_he_public_relations.yaml | 5 ++ .../global_mmlu_full_he_security_studies.yaml | 5 ++ .../he/global_mmlu_full_he_sociology.yaml | 5 ++ ...global_mmlu_full_he_us_foreign_policy.yaml | 5 ++ .../full/he/global_mmlu_full_he_virology.yaml | 5 ++ .../global_mmlu_full_he_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/he/utils.py | 73 +++++++++++++++++++ .../full/hi/_global_mmlu_full_hi.yaml | 11 +++ .../hi/_global_mmlu_full_hi_humanities.yaml | 8 ++ .../full/hi/_global_mmlu_full_hi_other.yaml | 8 ++ .../_global_mmlu_full_hi_social_sciences.yaml | 8 ++ .../full/hi/_global_mmlu_full_hi_stem.yaml | 8 ++ .../global_mmlu/full/hi/_hi_template_yaml | 16 ++++ .../global_mmlu_full_hi_abstract_algebra.yaml | 5 ++ .../full/hi/global_mmlu_full_hi_anatomy.yaml | 5 ++ .../hi/global_mmlu_full_hi_astronomy.yaml | 5 ++ .../global_mmlu_full_hi_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_hi_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_hi_college_biology.yaml | 5 ++ ...global_mmlu_full_hi_college_chemistry.yaml | 5 ++ ...mmlu_full_hi_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_hi_college_mathematics.yaml | 5 ++ .../global_mmlu_full_hi_college_medicine.yaml | 5 ++ .../global_mmlu_full_hi_college_physics.yaml | 5 ++ ...global_mmlu_full_hi_computer_security.yaml | 5 ++ ...lobal_mmlu_full_hi_conceptual_physics.yaml | 5 ++ .../hi/global_mmlu_full_hi_econometrics.yaml | 5 ++ ...l_mmlu_full_hi_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_hi_elementary_mathematics.yaml | 5 ++ .../hi/global_mmlu_full_hi_formal_logic.yaml | 5 ++ .../hi/global_mmlu_full_hi_global_facts.yaml | 5 ++ ...obal_mmlu_full_hi_high_school_biology.yaml | 5 ++ ...al_mmlu_full_hi_high_school_chemistry.yaml | 5 ++ ..._full_hi_high_school_computer_science.yaml | 5 ++ ..._full_hi_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_hi_high_school_geography.yaml | 5 ++ ...i_high_school_government_and_politics.yaml | 5 ++ ...lu_full_hi_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_hi_high_school_mathematics.yaml | 5 ++ ...lu_full_hi_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_hi_high_school_physics.yaml | 5 ++ ...l_mmlu_full_hi_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_hi_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_hi_high_school_us_history.yaml | 5 ++ ...mlu_full_hi_high_school_world_history.yaml | 5 ++ .../hi/global_mmlu_full_hi_human_aging.yaml | 5 ++ .../global_mmlu_full_hi_human_sexuality.yaml | 5 ++ ...global_mmlu_full_hi_international_law.yaml | 5 ++ .../hi/global_mmlu_full_hi_jurisprudence.yaml | 5 ++ ...global_mmlu_full_hi_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_hi_machine_learning.yaml | 5 ++ .../hi/global_mmlu_full_hi_management.yaml | 5 ++ .../hi/global_mmlu_full_hi_marketing.yaml | 5 ++ .../global_mmlu_full_hi_medical_genetics.yaml | 5 ++ .../hi/global_mmlu_full_hi_miscellaneous.yaml | 5 ++ .../global_mmlu_full_hi_moral_disputes.yaml | 5 ++ .../global_mmlu_full_hi_moral_scenarios.yaml | 5 ++ .../hi/global_mmlu_full_hi_nutrition.yaml | 5 ++ .../hi/global_mmlu_full_hi_philosophy.yaml | 5 ++ .../hi/global_mmlu_full_hi_prehistory.yaml | 5 ++ ..._mmlu_full_hi_professional_accounting.yaml | 5 ++ .../global_mmlu_full_hi_professional_law.yaml | 5 ++ ...al_mmlu_full_hi_professional_medicine.yaml | 5 ++ ..._mmlu_full_hi_professional_psychology.yaml | 5 ++ .../global_mmlu_full_hi_public_relations.yaml | 5 ++ .../global_mmlu_full_hi_security_studies.yaml | 5 ++ .../hi/global_mmlu_full_hi_sociology.yaml | 5 ++ ...global_mmlu_full_hi_us_foreign_policy.yaml | 5 ++ .../full/hi/global_mmlu_full_hi_virology.yaml | 5 ++ .../global_mmlu_full_hi_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/hi/utils.py | 73 +++++++++++++++++++ .../full/id/_global_mmlu_full_id.yaml | 11 +++ .../id/_global_mmlu_full_id_humanities.yaml | 8 ++ .../full/id/_global_mmlu_full_id_other.yaml | 8 ++ .../_global_mmlu_full_id_social_sciences.yaml | 8 ++ .../full/id/_global_mmlu_full_id_stem.yaml | 8 ++ .../global_mmlu/full/id/_id_template_yaml | 16 ++++ .../global_mmlu_full_id_abstract_algebra.yaml | 5 ++ .../full/id/global_mmlu_full_id_anatomy.yaml | 5 ++ .../id/global_mmlu_full_id_astronomy.yaml | 5 ++ .../global_mmlu_full_id_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_id_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_id_college_biology.yaml | 5 ++ ...global_mmlu_full_id_college_chemistry.yaml | 5 ++ ...mmlu_full_id_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_id_college_mathematics.yaml | 5 ++ .../global_mmlu_full_id_college_medicine.yaml | 5 ++ .../global_mmlu_full_id_college_physics.yaml | 5 ++ ...global_mmlu_full_id_computer_security.yaml | 5 ++ ...lobal_mmlu_full_id_conceptual_physics.yaml | 5 ++ .../id/global_mmlu_full_id_econometrics.yaml | 5 ++ ...l_mmlu_full_id_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_id_elementary_mathematics.yaml | 5 ++ .../id/global_mmlu_full_id_formal_logic.yaml | 5 ++ .../id/global_mmlu_full_id_global_facts.yaml | 5 ++ ...obal_mmlu_full_id_high_school_biology.yaml | 5 ++ ...al_mmlu_full_id_high_school_chemistry.yaml | 5 ++ ..._full_id_high_school_computer_science.yaml | 5 ++ ..._full_id_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_id_high_school_geography.yaml | 5 ++ ...d_high_school_government_and_politics.yaml | 5 ++ ...lu_full_id_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_id_high_school_mathematics.yaml | 5 ++ ...lu_full_id_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_id_high_school_physics.yaml | 5 ++ ...l_mmlu_full_id_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_id_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_id_high_school_us_history.yaml | 5 ++ ...mlu_full_id_high_school_world_history.yaml | 5 ++ .../id/global_mmlu_full_id_human_aging.yaml | 5 ++ .../global_mmlu_full_id_human_sexuality.yaml | 5 ++ ...global_mmlu_full_id_international_law.yaml | 5 ++ .../id/global_mmlu_full_id_jurisprudence.yaml | 5 ++ ...global_mmlu_full_id_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_id_machine_learning.yaml | 5 ++ .../id/global_mmlu_full_id_management.yaml | 5 ++ .../id/global_mmlu_full_id_marketing.yaml | 5 ++ .../global_mmlu_full_id_medical_genetics.yaml | 5 ++ .../id/global_mmlu_full_id_miscellaneous.yaml | 5 ++ .../global_mmlu_full_id_moral_disputes.yaml | 5 ++ .../global_mmlu_full_id_moral_scenarios.yaml | 5 ++ .../id/global_mmlu_full_id_nutrition.yaml | 5 ++ .../id/global_mmlu_full_id_philosophy.yaml | 5 ++ .../id/global_mmlu_full_id_prehistory.yaml | 5 ++ ..._mmlu_full_id_professional_accounting.yaml | 5 ++ .../global_mmlu_full_id_professional_law.yaml | 5 ++ ...al_mmlu_full_id_professional_medicine.yaml | 5 ++ ..._mmlu_full_id_professional_psychology.yaml | 5 ++ .../global_mmlu_full_id_public_relations.yaml | 5 ++ .../global_mmlu_full_id_security_studies.yaml | 5 ++ .../id/global_mmlu_full_id_sociology.yaml | 5 ++ ...global_mmlu_full_id_us_foreign_policy.yaml | 5 ++ .../full/id/global_mmlu_full_id_virology.yaml | 5 ++ .../global_mmlu_full_id_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/id/utils.py | 73 +++++++++++++++++++ .../full/ig/_global_mmlu_full_ig.yaml | 11 +++ .../ig/_global_mmlu_full_ig_humanities.yaml | 8 ++ .../full/ig/_global_mmlu_full_ig_other.yaml | 8 ++ .../_global_mmlu_full_ig_social_sciences.yaml | 8 ++ .../full/ig/_global_mmlu_full_ig_stem.yaml | 8 ++ .../global_mmlu/full/ig/_ig_template_yaml | 16 ++++ .../global_mmlu_full_ig_abstract_algebra.yaml | 5 ++ .../full/ig/global_mmlu_full_ig_anatomy.yaml | 5 ++ .../ig/global_mmlu_full_ig_astronomy.yaml | 5 ++ .../global_mmlu_full_ig_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ig_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ig_college_biology.yaml | 5 ++ ...global_mmlu_full_ig_college_chemistry.yaml | 5 ++ ...mmlu_full_ig_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ig_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ig_college_medicine.yaml | 5 ++ .../global_mmlu_full_ig_college_physics.yaml | 5 ++ ...global_mmlu_full_ig_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ig_conceptual_physics.yaml | 5 ++ .../ig/global_mmlu_full_ig_econometrics.yaml | 5 ++ ...l_mmlu_full_ig_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ig_elementary_mathematics.yaml | 5 ++ .../ig/global_mmlu_full_ig_formal_logic.yaml | 5 ++ .../ig/global_mmlu_full_ig_global_facts.yaml | 5 ++ ...obal_mmlu_full_ig_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ig_high_school_chemistry.yaml | 5 ++ ..._full_ig_high_school_computer_science.yaml | 5 ++ ..._full_ig_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ig_high_school_geography.yaml | 5 ++ ...g_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ig_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ig_high_school_mathematics.yaml | 5 ++ ...lu_full_ig_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ig_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ig_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ig_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ig_high_school_us_history.yaml | 5 ++ ...mlu_full_ig_high_school_world_history.yaml | 5 ++ .../ig/global_mmlu_full_ig_human_aging.yaml | 5 ++ .../global_mmlu_full_ig_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ig_international_law.yaml | 5 ++ .../ig/global_mmlu_full_ig_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ig_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ig_machine_learning.yaml | 5 ++ .../ig/global_mmlu_full_ig_management.yaml | 5 ++ .../ig/global_mmlu_full_ig_marketing.yaml | 5 ++ .../global_mmlu_full_ig_medical_genetics.yaml | 5 ++ .../ig/global_mmlu_full_ig_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ig_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ig_moral_scenarios.yaml | 5 ++ .../ig/global_mmlu_full_ig_nutrition.yaml | 5 ++ .../ig/global_mmlu_full_ig_philosophy.yaml | 5 ++ .../ig/global_mmlu_full_ig_prehistory.yaml | 5 ++ ..._mmlu_full_ig_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ig_professional_law.yaml | 5 ++ ...al_mmlu_full_ig_professional_medicine.yaml | 5 ++ ..._mmlu_full_ig_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ig_public_relations.yaml | 5 ++ .../global_mmlu_full_ig_security_studies.yaml | 5 ++ .../ig/global_mmlu_full_ig_sociology.yaml | 5 ++ ...global_mmlu_full_ig_us_foreign_policy.yaml | 5 ++ .../full/ig/global_mmlu_full_ig_virology.yaml | 5 ++ .../global_mmlu_full_ig_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ig/utils.py | 73 +++++++++++++++++++ .../full/it/_global_mmlu_full_it.yaml | 11 +++ .../it/_global_mmlu_full_it_humanities.yaml | 8 ++ .../full/it/_global_mmlu_full_it_other.yaml | 8 ++ .../_global_mmlu_full_it_social_sciences.yaml | 8 ++ .../full/it/_global_mmlu_full_it_stem.yaml | 8 ++ .../global_mmlu/full/it/_it_template_yaml | 16 ++++ .../global_mmlu_full_it_abstract_algebra.yaml | 5 ++ .../full/it/global_mmlu_full_it_anatomy.yaml | 5 ++ .../it/global_mmlu_full_it_astronomy.yaml | 5 ++ .../global_mmlu_full_it_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_it_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_it_college_biology.yaml | 5 ++ ...global_mmlu_full_it_college_chemistry.yaml | 5 ++ ...mmlu_full_it_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_it_college_mathematics.yaml | 5 ++ .../global_mmlu_full_it_college_medicine.yaml | 5 ++ .../global_mmlu_full_it_college_physics.yaml | 5 ++ ...global_mmlu_full_it_computer_security.yaml | 5 ++ ...lobal_mmlu_full_it_conceptual_physics.yaml | 5 ++ .../it/global_mmlu_full_it_econometrics.yaml | 5 ++ ...l_mmlu_full_it_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_it_elementary_mathematics.yaml | 5 ++ .../it/global_mmlu_full_it_formal_logic.yaml | 5 ++ .../it/global_mmlu_full_it_global_facts.yaml | 5 ++ ...obal_mmlu_full_it_high_school_biology.yaml | 5 ++ ...al_mmlu_full_it_high_school_chemistry.yaml | 5 ++ ..._full_it_high_school_computer_science.yaml | 5 ++ ..._full_it_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_it_high_school_geography.yaml | 5 ++ ...t_high_school_government_and_politics.yaml | 5 ++ ...lu_full_it_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_it_high_school_mathematics.yaml | 5 ++ ...lu_full_it_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_it_high_school_physics.yaml | 5 ++ ...l_mmlu_full_it_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_it_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_it_high_school_us_history.yaml | 5 ++ ...mlu_full_it_high_school_world_history.yaml | 5 ++ .../it/global_mmlu_full_it_human_aging.yaml | 5 ++ .../global_mmlu_full_it_human_sexuality.yaml | 5 ++ ...global_mmlu_full_it_international_law.yaml | 5 ++ .../it/global_mmlu_full_it_jurisprudence.yaml | 5 ++ ...global_mmlu_full_it_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_it_machine_learning.yaml | 5 ++ .../it/global_mmlu_full_it_management.yaml | 5 ++ .../it/global_mmlu_full_it_marketing.yaml | 5 ++ .../global_mmlu_full_it_medical_genetics.yaml | 5 ++ .../it/global_mmlu_full_it_miscellaneous.yaml | 5 ++ .../global_mmlu_full_it_moral_disputes.yaml | 5 ++ .../global_mmlu_full_it_moral_scenarios.yaml | 5 ++ .../it/global_mmlu_full_it_nutrition.yaml | 5 ++ .../it/global_mmlu_full_it_philosophy.yaml | 5 ++ .../it/global_mmlu_full_it_prehistory.yaml | 5 ++ ..._mmlu_full_it_professional_accounting.yaml | 5 ++ .../global_mmlu_full_it_professional_law.yaml | 5 ++ ...al_mmlu_full_it_professional_medicine.yaml | 5 ++ ..._mmlu_full_it_professional_psychology.yaml | 5 ++ .../global_mmlu_full_it_public_relations.yaml | 5 ++ .../global_mmlu_full_it_security_studies.yaml | 5 ++ .../it/global_mmlu_full_it_sociology.yaml | 5 ++ ...global_mmlu_full_it_us_foreign_policy.yaml | 5 ++ .../full/it/global_mmlu_full_it_virology.yaml | 5 ++ .../global_mmlu_full_it_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/it/utils.py | 73 +++++++++++++++++++ .../full/ja/_global_mmlu_full_ja.yaml | 11 +++ .../ja/_global_mmlu_full_ja_humanities.yaml | 8 ++ .../full/ja/_global_mmlu_full_ja_other.yaml | 8 ++ .../_global_mmlu_full_ja_social_sciences.yaml | 8 ++ .../full/ja/_global_mmlu_full_ja_stem.yaml | 8 ++ .../global_mmlu/full/ja/_ja_template_yaml | 16 ++++ .../global_mmlu_full_ja_abstract_algebra.yaml | 5 ++ .../full/ja/global_mmlu_full_ja_anatomy.yaml | 5 ++ .../ja/global_mmlu_full_ja_astronomy.yaml | 5 ++ .../global_mmlu_full_ja_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ja_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ja_college_biology.yaml | 5 ++ ...global_mmlu_full_ja_college_chemistry.yaml | 5 ++ ...mmlu_full_ja_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ja_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ja_college_medicine.yaml | 5 ++ .../global_mmlu_full_ja_college_physics.yaml | 5 ++ ...global_mmlu_full_ja_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ja_conceptual_physics.yaml | 5 ++ .../ja/global_mmlu_full_ja_econometrics.yaml | 5 ++ ...l_mmlu_full_ja_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ja_elementary_mathematics.yaml | 5 ++ .../ja/global_mmlu_full_ja_formal_logic.yaml | 5 ++ .../ja/global_mmlu_full_ja_global_facts.yaml | 5 ++ ...obal_mmlu_full_ja_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ja_high_school_chemistry.yaml | 5 ++ ..._full_ja_high_school_computer_science.yaml | 5 ++ ..._full_ja_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ja_high_school_geography.yaml | 5 ++ ...a_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ja_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ja_high_school_mathematics.yaml | 5 ++ ...lu_full_ja_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ja_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ja_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ja_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ja_high_school_us_history.yaml | 5 ++ ...mlu_full_ja_high_school_world_history.yaml | 5 ++ .../ja/global_mmlu_full_ja_human_aging.yaml | 5 ++ .../global_mmlu_full_ja_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ja_international_law.yaml | 5 ++ .../ja/global_mmlu_full_ja_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ja_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ja_machine_learning.yaml | 5 ++ .../ja/global_mmlu_full_ja_management.yaml | 5 ++ .../ja/global_mmlu_full_ja_marketing.yaml | 5 ++ .../global_mmlu_full_ja_medical_genetics.yaml | 5 ++ .../ja/global_mmlu_full_ja_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ja_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ja_moral_scenarios.yaml | 5 ++ .../ja/global_mmlu_full_ja_nutrition.yaml | 5 ++ .../ja/global_mmlu_full_ja_philosophy.yaml | 5 ++ .../ja/global_mmlu_full_ja_prehistory.yaml | 5 ++ ..._mmlu_full_ja_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ja_professional_law.yaml | 5 ++ ...al_mmlu_full_ja_professional_medicine.yaml | 5 ++ ..._mmlu_full_ja_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ja_public_relations.yaml | 5 ++ .../global_mmlu_full_ja_security_studies.yaml | 5 ++ .../ja/global_mmlu_full_ja_sociology.yaml | 5 ++ ...global_mmlu_full_ja_us_foreign_policy.yaml | 5 ++ .../full/ja/global_mmlu_full_ja_virology.yaml | 5 ++ .../global_mmlu_full_ja_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ja/utils.py | 73 +++++++++++++++++++ .../full/ko/_global_mmlu_full_ko.yaml | 11 +++ .../ko/_global_mmlu_full_ko_humanities.yaml | 8 ++ .../full/ko/_global_mmlu_full_ko_other.yaml | 8 ++ .../_global_mmlu_full_ko_social_sciences.yaml | 8 ++ .../full/ko/_global_mmlu_full_ko_stem.yaml | 8 ++ .../global_mmlu/full/ko/_ko_template_yaml | 16 ++++ .../global_mmlu_full_ko_abstract_algebra.yaml | 5 ++ .../full/ko/global_mmlu_full_ko_anatomy.yaml | 5 ++ .../ko/global_mmlu_full_ko_astronomy.yaml | 5 ++ .../global_mmlu_full_ko_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ko_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ko_college_biology.yaml | 5 ++ ...global_mmlu_full_ko_college_chemistry.yaml | 5 ++ ...mmlu_full_ko_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ko_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ko_college_medicine.yaml | 5 ++ .../global_mmlu_full_ko_college_physics.yaml | 5 ++ ...global_mmlu_full_ko_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ko_conceptual_physics.yaml | 5 ++ .../ko/global_mmlu_full_ko_econometrics.yaml | 5 ++ ...l_mmlu_full_ko_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ko_elementary_mathematics.yaml | 5 ++ .../ko/global_mmlu_full_ko_formal_logic.yaml | 5 ++ .../ko/global_mmlu_full_ko_global_facts.yaml | 5 ++ ...obal_mmlu_full_ko_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ko_high_school_chemistry.yaml | 5 ++ ..._full_ko_high_school_computer_science.yaml | 5 ++ ..._full_ko_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ko_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ko_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ko_high_school_mathematics.yaml | 5 ++ ...lu_full_ko_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ko_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ko_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ko_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ko_high_school_us_history.yaml | 5 ++ ...mlu_full_ko_high_school_world_history.yaml | 5 ++ .../ko/global_mmlu_full_ko_human_aging.yaml | 5 ++ .../global_mmlu_full_ko_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ko_international_law.yaml | 5 ++ .../ko/global_mmlu_full_ko_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ko_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ko_machine_learning.yaml | 5 ++ .../ko/global_mmlu_full_ko_management.yaml | 5 ++ .../ko/global_mmlu_full_ko_marketing.yaml | 5 ++ .../global_mmlu_full_ko_medical_genetics.yaml | 5 ++ .../ko/global_mmlu_full_ko_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ko_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ko_moral_scenarios.yaml | 5 ++ .../ko/global_mmlu_full_ko_nutrition.yaml | 5 ++ .../ko/global_mmlu_full_ko_philosophy.yaml | 5 ++ .../ko/global_mmlu_full_ko_prehistory.yaml | 5 ++ ..._mmlu_full_ko_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ko_professional_law.yaml | 5 ++ ...al_mmlu_full_ko_professional_medicine.yaml | 5 ++ ..._mmlu_full_ko_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ko_public_relations.yaml | 5 ++ .../global_mmlu_full_ko_security_studies.yaml | 5 ++ .../ko/global_mmlu_full_ko_sociology.yaml | 5 ++ ...global_mmlu_full_ko_us_foreign_policy.yaml | 5 ++ .../full/ko/global_mmlu_full_ko_virology.yaml | 5 ++ .../global_mmlu_full_ko_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ko/utils.py | 73 +++++++++++++++++++ .../full/ky/_global_mmlu_full_ky.yaml | 11 +++ .../ky/_global_mmlu_full_ky_humanities.yaml | 8 ++ .../full/ky/_global_mmlu_full_ky_other.yaml | 8 ++ .../_global_mmlu_full_ky_social_sciences.yaml | 8 ++ .../full/ky/_global_mmlu_full_ky_stem.yaml | 8 ++ .../global_mmlu/full/ky/_ky_template_yaml | 16 ++++ .../global_mmlu_full_ky_abstract_algebra.yaml | 5 ++ .../full/ky/global_mmlu_full_ky_anatomy.yaml | 5 ++ .../ky/global_mmlu_full_ky_astronomy.yaml | 5 ++ .../global_mmlu_full_ky_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ky_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ky_college_biology.yaml | 5 ++ ...global_mmlu_full_ky_college_chemistry.yaml | 5 ++ ...mmlu_full_ky_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ky_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ky_college_medicine.yaml | 5 ++ .../global_mmlu_full_ky_college_physics.yaml | 5 ++ ...global_mmlu_full_ky_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ky_conceptual_physics.yaml | 5 ++ .../ky/global_mmlu_full_ky_econometrics.yaml | 5 ++ ...l_mmlu_full_ky_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ky_elementary_mathematics.yaml | 5 ++ .../ky/global_mmlu_full_ky_formal_logic.yaml | 5 ++ .../ky/global_mmlu_full_ky_global_facts.yaml | 5 ++ ...obal_mmlu_full_ky_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ky_high_school_chemistry.yaml | 5 ++ ..._full_ky_high_school_computer_science.yaml | 5 ++ ..._full_ky_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ky_high_school_geography.yaml | 5 ++ ...y_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ky_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ky_high_school_mathematics.yaml | 5 ++ ...lu_full_ky_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ky_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ky_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ky_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ky_high_school_us_history.yaml | 5 ++ ...mlu_full_ky_high_school_world_history.yaml | 5 ++ .../ky/global_mmlu_full_ky_human_aging.yaml | 5 ++ .../global_mmlu_full_ky_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ky_international_law.yaml | 5 ++ .../ky/global_mmlu_full_ky_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ky_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ky_machine_learning.yaml | 5 ++ .../ky/global_mmlu_full_ky_management.yaml | 5 ++ .../ky/global_mmlu_full_ky_marketing.yaml | 5 ++ .../global_mmlu_full_ky_medical_genetics.yaml | 5 ++ .../ky/global_mmlu_full_ky_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ky_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ky_moral_scenarios.yaml | 5 ++ .../ky/global_mmlu_full_ky_nutrition.yaml | 5 ++ .../ky/global_mmlu_full_ky_philosophy.yaml | 5 ++ .../ky/global_mmlu_full_ky_prehistory.yaml | 5 ++ ..._mmlu_full_ky_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ky_professional_law.yaml | 5 ++ ...al_mmlu_full_ky_professional_medicine.yaml | 5 ++ ..._mmlu_full_ky_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ky_public_relations.yaml | 5 ++ .../global_mmlu_full_ky_security_studies.yaml | 5 ++ .../ky/global_mmlu_full_ky_sociology.yaml | 5 ++ ...global_mmlu_full_ky_us_foreign_policy.yaml | 5 ++ .../full/ky/global_mmlu_full_ky_virology.yaml | 5 ++ .../global_mmlu_full_ky_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ky/utils.py | 73 +++++++++++++++++++ .../full/lt/_global_mmlu_full_lt.yaml | 11 +++ .../lt/_global_mmlu_full_lt_humanities.yaml | 8 ++ .../full/lt/_global_mmlu_full_lt_other.yaml | 8 ++ .../_global_mmlu_full_lt_social_sciences.yaml | 8 ++ .../full/lt/_global_mmlu_full_lt_stem.yaml | 8 ++ .../global_mmlu/full/lt/_lt_template_yaml | 16 ++++ .../global_mmlu_full_lt_abstract_algebra.yaml | 5 ++ .../full/lt/global_mmlu_full_lt_anatomy.yaml | 5 ++ .../lt/global_mmlu_full_lt_astronomy.yaml | 5 ++ .../global_mmlu_full_lt_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_lt_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_lt_college_biology.yaml | 5 ++ ...global_mmlu_full_lt_college_chemistry.yaml | 5 ++ ...mmlu_full_lt_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_lt_college_mathematics.yaml | 5 ++ .../global_mmlu_full_lt_college_medicine.yaml | 5 ++ .../global_mmlu_full_lt_college_physics.yaml | 5 ++ ...global_mmlu_full_lt_computer_security.yaml | 5 ++ ...lobal_mmlu_full_lt_conceptual_physics.yaml | 5 ++ .../lt/global_mmlu_full_lt_econometrics.yaml | 5 ++ ...l_mmlu_full_lt_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_lt_elementary_mathematics.yaml | 5 ++ .../lt/global_mmlu_full_lt_formal_logic.yaml | 5 ++ .../lt/global_mmlu_full_lt_global_facts.yaml | 5 ++ ...obal_mmlu_full_lt_high_school_biology.yaml | 5 ++ ...al_mmlu_full_lt_high_school_chemistry.yaml | 5 ++ ..._full_lt_high_school_computer_science.yaml | 5 ++ ..._full_lt_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_lt_high_school_geography.yaml | 5 ++ ...t_high_school_government_and_politics.yaml | 5 ++ ...lu_full_lt_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_lt_high_school_mathematics.yaml | 5 ++ ...lu_full_lt_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_lt_high_school_physics.yaml | 5 ++ ...l_mmlu_full_lt_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_lt_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_lt_high_school_us_history.yaml | 5 ++ ...mlu_full_lt_high_school_world_history.yaml | 5 ++ .../lt/global_mmlu_full_lt_human_aging.yaml | 5 ++ .../global_mmlu_full_lt_human_sexuality.yaml | 5 ++ ...global_mmlu_full_lt_international_law.yaml | 5 ++ .../lt/global_mmlu_full_lt_jurisprudence.yaml | 5 ++ ...global_mmlu_full_lt_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_lt_machine_learning.yaml | 5 ++ .../lt/global_mmlu_full_lt_management.yaml | 5 ++ .../lt/global_mmlu_full_lt_marketing.yaml | 5 ++ .../global_mmlu_full_lt_medical_genetics.yaml | 5 ++ .../lt/global_mmlu_full_lt_miscellaneous.yaml | 5 ++ .../global_mmlu_full_lt_moral_disputes.yaml | 5 ++ .../global_mmlu_full_lt_moral_scenarios.yaml | 5 ++ .../lt/global_mmlu_full_lt_nutrition.yaml | 5 ++ .../lt/global_mmlu_full_lt_philosophy.yaml | 5 ++ .../lt/global_mmlu_full_lt_prehistory.yaml | 5 ++ ..._mmlu_full_lt_professional_accounting.yaml | 5 ++ .../global_mmlu_full_lt_professional_law.yaml | 5 ++ ...al_mmlu_full_lt_professional_medicine.yaml | 5 ++ ..._mmlu_full_lt_professional_psychology.yaml | 5 ++ .../global_mmlu_full_lt_public_relations.yaml | 5 ++ .../global_mmlu_full_lt_security_studies.yaml | 5 ++ .../lt/global_mmlu_full_lt_sociology.yaml | 5 ++ ...global_mmlu_full_lt_us_foreign_policy.yaml | 5 ++ .../full/lt/global_mmlu_full_lt_virology.yaml | 5 ++ .../global_mmlu_full_lt_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/lt/utils.py | 73 +++++++++++++++++++ .../full/mg/_global_mmlu_full_mg.yaml | 11 +++ .../mg/_global_mmlu_full_mg_humanities.yaml | 8 ++ .../full/mg/_global_mmlu_full_mg_other.yaml | 8 ++ .../_global_mmlu_full_mg_social_sciences.yaml | 8 ++ .../full/mg/_global_mmlu_full_mg_stem.yaml | 8 ++ .../global_mmlu/full/mg/_mg_template_yaml | 16 ++++ .../global_mmlu_full_mg_abstract_algebra.yaml | 5 ++ .../full/mg/global_mmlu_full_mg_anatomy.yaml | 5 ++ .../mg/global_mmlu_full_mg_astronomy.yaml | 5 ++ .../global_mmlu_full_mg_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_mg_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_mg_college_biology.yaml | 5 ++ ...global_mmlu_full_mg_college_chemistry.yaml | 5 ++ ...mmlu_full_mg_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_mg_college_mathematics.yaml | 5 ++ .../global_mmlu_full_mg_college_medicine.yaml | 5 ++ .../global_mmlu_full_mg_college_physics.yaml | 5 ++ ...global_mmlu_full_mg_computer_security.yaml | 5 ++ ...lobal_mmlu_full_mg_conceptual_physics.yaml | 5 ++ .../mg/global_mmlu_full_mg_econometrics.yaml | 5 ++ ...l_mmlu_full_mg_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_mg_elementary_mathematics.yaml | 5 ++ .../mg/global_mmlu_full_mg_formal_logic.yaml | 5 ++ .../mg/global_mmlu_full_mg_global_facts.yaml | 5 ++ ...obal_mmlu_full_mg_high_school_biology.yaml | 5 ++ ...al_mmlu_full_mg_high_school_chemistry.yaml | 5 ++ ..._full_mg_high_school_computer_science.yaml | 5 ++ ..._full_mg_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_mg_high_school_geography.yaml | 5 ++ ...g_high_school_government_and_politics.yaml | 5 ++ ...lu_full_mg_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_mg_high_school_mathematics.yaml | 5 ++ ...lu_full_mg_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_mg_high_school_physics.yaml | 5 ++ ...l_mmlu_full_mg_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_mg_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_mg_high_school_us_history.yaml | 5 ++ ...mlu_full_mg_high_school_world_history.yaml | 5 ++ .../mg/global_mmlu_full_mg_human_aging.yaml | 5 ++ .../global_mmlu_full_mg_human_sexuality.yaml | 5 ++ ...global_mmlu_full_mg_international_law.yaml | 5 ++ .../mg/global_mmlu_full_mg_jurisprudence.yaml | 5 ++ ...global_mmlu_full_mg_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_mg_machine_learning.yaml | 5 ++ .../mg/global_mmlu_full_mg_management.yaml | 5 ++ .../mg/global_mmlu_full_mg_marketing.yaml | 5 ++ .../global_mmlu_full_mg_medical_genetics.yaml | 5 ++ .../mg/global_mmlu_full_mg_miscellaneous.yaml | 5 ++ .../global_mmlu_full_mg_moral_disputes.yaml | 5 ++ .../global_mmlu_full_mg_moral_scenarios.yaml | 5 ++ .../mg/global_mmlu_full_mg_nutrition.yaml | 5 ++ .../mg/global_mmlu_full_mg_philosophy.yaml | 5 ++ .../mg/global_mmlu_full_mg_prehistory.yaml | 5 ++ ..._mmlu_full_mg_professional_accounting.yaml | 5 ++ .../global_mmlu_full_mg_professional_law.yaml | 5 ++ ...al_mmlu_full_mg_professional_medicine.yaml | 5 ++ ..._mmlu_full_mg_professional_psychology.yaml | 5 ++ .../global_mmlu_full_mg_public_relations.yaml | 5 ++ .../global_mmlu_full_mg_security_studies.yaml | 5 ++ .../mg/global_mmlu_full_mg_sociology.yaml | 5 ++ ...global_mmlu_full_mg_us_foreign_policy.yaml | 5 ++ .../full/mg/global_mmlu_full_mg_virology.yaml | 5 ++ .../global_mmlu_full_mg_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/mg/utils.py | 73 +++++++++++++++++++ .../full/ms/_global_mmlu_full_ms.yaml | 11 +++ .../ms/_global_mmlu_full_ms_humanities.yaml | 8 ++ .../full/ms/_global_mmlu_full_ms_other.yaml | 8 ++ .../_global_mmlu_full_ms_social_sciences.yaml | 8 ++ .../full/ms/_global_mmlu_full_ms_stem.yaml | 8 ++ .../global_mmlu/full/ms/_ms_template_yaml | 16 ++++ .../global_mmlu_full_ms_abstract_algebra.yaml | 5 ++ .../full/ms/global_mmlu_full_ms_anatomy.yaml | 5 ++ .../ms/global_mmlu_full_ms_astronomy.yaml | 5 ++ .../global_mmlu_full_ms_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ms_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ms_college_biology.yaml | 5 ++ ...global_mmlu_full_ms_college_chemistry.yaml | 5 ++ ...mmlu_full_ms_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ms_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ms_college_medicine.yaml | 5 ++ .../global_mmlu_full_ms_college_physics.yaml | 5 ++ ...global_mmlu_full_ms_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ms_conceptual_physics.yaml | 5 ++ .../ms/global_mmlu_full_ms_econometrics.yaml | 5 ++ ...l_mmlu_full_ms_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ms_elementary_mathematics.yaml | 5 ++ .../ms/global_mmlu_full_ms_formal_logic.yaml | 5 ++ .../ms/global_mmlu_full_ms_global_facts.yaml | 5 ++ ...obal_mmlu_full_ms_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ms_high_school_chemistry.yaml | 5 ++ ..._full_ms_high_school_computer_science.yaml | 5 ++ ..._full_ms_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ms_high_school_geography.yaml | 5 ++ ...s_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ms_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ms_high_school_mathematics.yaml | 5 ++ ...lu_full_ms_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ms_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ms_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ms_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ms_high_school_us_history.yaml | 5 ++ ...mlu_full_ms_high_school_world_history.yaml | 5 ++ .../ms/global_mmlu_full_ms_human_aging.yaml | 5 ++ .../global_mmlu_full_ms_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ms_international_law.yaml | 5 ++ .../ms/global_mmlu_full_ms_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ms_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ms_machine_learning.yaml | 5 ++ .../ms/global_mmlu_full_ms_management.yaml | 5 ++ .../ms/global_mmlu_full_ms_marketing.yaml | 5 ++ .../global_mmlu_full_ms_medical_genetics.yaml | 5 ++ .../ms/global_mmlu_full_ms_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ms_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ms_moral_scenarios.yaml | 5 ++ .../ms/global_mmlu_full_ms_nutrition.yaml | 5 ++ .../ms/global_mmlu_full_ms_philosophy.yaml | 5 ++ .../ms/global_mmlu_full_ms_prehistory.yaml | 5 ++ ..._mmlu_full_ms_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ms_professional_law.yaml | 5 ++ ...al_mmlu_full_ms_professional_medicine.yaml | 5 ++ ..._mmlu_full_ms_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ms_public_relations.yaml | 5 ++ .../global_mmlu_full_ms_security_studies.yaml | 5 ++ .../ms/global_mmlu_full_ms_sociology.yaml | 5 ++ ...global_mmlu_full_ms_us_foreign_policy.yaml | 5 ++ .../full/ms/global_mmlu_full_ms_virology.yaml | 5 ++ .../global_mmlu_full_ms_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ms/utils.py | 73 +++++++++++++++++++ .../full/ne/_global_mmlu_full_ne.yaml | 11 +++ .../ne/_global_mmlu_full_ne_humanities.yaml | 8 ++ .../full/ne/_global_mmlu_full_ne_other.yaml | 8 ++ .../_global_mmlu_full_ne_social_sciences.yaml | 8 ++ .../full/ne/_global_mmlu_full_ne_stem.yaml | 8 ++ .../global_mmlu/full/ne/_ne_template_yaml | 16 ++++ .../global_mmlu_full_ne_abstract_algebra.yaml | 5 ++ .../full/ne/global_mmlu_full_ne_anatomy.yaml | 5 ++ .../ne/global_mmlu_full_ne_astronomy.yaml | 5 ++ .../global_mmlu_full_ne_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ne_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ne_college_biology.yaml | 5 ++ ...global_mmlu_full_ne_college_chemistry.yaml | 5 ++ ...mmlu_full_ne_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ne_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ne_college_medicine.yaml | 5 ++ .../global_mmlu_full_ne_college_physics.yaml | 5 ++ ...global_mmlu_full_ne_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ne_conceptual_physics.yaml | 5 ++ .../ne/global_mmlu_full_ne_econometrics.yaml | 5 ++ ...l_mmlu_full_ne_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ne_elementary_mathematics.yaml | 5 ++ .../ne/global_mmlu_full_ne_formal_logic.yaml | 5 ++ .../ne/global_mmlu_full_ne_global_facts.yaml | 5 ++ ...obal_mmlu_full_ne_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ne_high_school_chemistry.yaml | 5 ++ ..._full_ne_high_school_computer_science.yaml | 5 ++ ..._full_ne_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ne_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ne_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ne_high_school_mathematics.yaml | 5 ++ ...lu_full_ne_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ne_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ne_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ne_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ne_high_school_us_history.yaml | 5 ++ ...mlu_full_ne_high_school_world_history.yaml | 5 ++ .../ne/global_mmlu_full_ne_human_aging.yaml | 5 ++ .../global_mmlu_full_ne_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ne_international_law.yaml | 5 ++ .../ne/global_mmlu_full_ne_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ne_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ne_machine_learning.yaml | 5 ++ .../ne/global_mmlu_full_ne_management.yaml | 5 ++ .../ne/global_mmlu_full_ne_marketing.yaml | 5 ++ .../global_mmlu_full_ne_medical_genetics.yaml | 5 ++ .../ne/global_mmlu_full_ne_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ne_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ne_moral_scenarios.yaml | 5 ++ .../ne/global_mmlu_full_ne_nutrition.yaml | 5 ++ .../ne/global_mmlu_full_ne_philosophy.yaml | 5 ++ .../ne/global_mmlu_full_ne_prehistory.yaml | 5 ++ ..._mmlu_full_ne_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ne_professional_law.yaml | 5 ++ ...al_mmlu_full_ne_professional_medicine.yaml | 5 ++ ..._mmlu_full_ne_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ne_public_relations.yaml | 5 ++ .../global_mmlu_full_ne_security_studies.yaml | 5 ++ .../ne/global_mmlu_full_ne_sociology.yaml | 5 ++ ...global_mmlu_full_ne_us_foreign_policy.yaml | 5 ++ .../full/ne/global_mmlu_full_ne_virology.yaml | 5 ++ .../global_mmlu_full_ne_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ne/utils.py | 73 +++++++++++++++++++ .../full/nl/_global_mmlu_full_nl.yaml | 11 +++ .../nl/_global_mmlu_full_nl_humanities.yaml | 8 ++ .../full/nl/_global_mmlu_full_nl_other.yaml | 8 ++ .../_global_mmlu_full_nl_social_sciences.yaml | 8 ++ .../full/nl/_global_mmlu_full_nl_stem.yaml | 8 ++ .../global_mmlu/full/nl/_nl_template_yaml | 16 ++++ .../global_mmlu_full_nl_abstract_algebra.yaml | 5 ++ .../full/nl/global_mmlu_full_nl_anatomy.yaml | 5 ++ .../nl/global_mmlu_full_nl_astronomy.yaml | 5 ++ .../global_mmlu_full_nl_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_nl_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_nl_college_biology.yaml | 5 ++ ...global_mmlu_full_nl_college_chemistry.yaml | 5 ++ ...mmlu_full_nl_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_nl_college_mathematics.yaml | 5 ++ .../global_mmlu_full_nl_college_medicine.yaml | 5 ++ .../global_mmlu_full_nl_college_physics.yaml | 5 ++ ...global_mmlu_full_nl_computer_security.yaml | 5 ++ ...lobal_mmlu_full_nl_conceptual_physics.yaml | 5 ++ .../nl/global_mmlu_full_nl_econometrics.yaml | 5 ++ ...l_mmlu_full_nl_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_nl_elementary_mathematics.yaml | 5 ++ .../nl/global_mmlu_full_nl_formal_logic.yaml | 5 ++ .../nl/global_mmlu_full_nl_global_facts.yaml | 5 ++ ...obal_mmlu_full_nl_high_school_biology.yaml | 5 ++ ...al_mmlu_full_nl_high_school_chemistry.yaml | 5 ++ ..._full_nl_high_school_computer_science.yaml | 5 ++ ..._full_nl_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_nl_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...lu_full_nl_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_nl_high_school_mathematics.yaml | 5 ++ ...lu_full_nl_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_nl_high_school_physics.yaml | 5 ++ ...l_mmlu_full_nl_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_nl_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_nl_high_school_us_history.yaml | 5 ++ ...mlu_full_nl_high_school_world_history.yaml | 5 ++ .../nl/global_mmlu_full_nl_human_aging.yaml | 5 ++ .../global_mmlu_full_nl_human_sexuality.yaml | 5 ++ ...global_mmlu_full_nl_international_law.yaml | 5 ++ .../nl/global_mmlu_full_nl_jurisprudence.yaml | 5 ++ ...global_mmlu_full_nl_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_nl_machine_learning.yaml | 5 ++ .../nl/global_mmlu_full_nl_management.yaml | 5 ++ .../nl/global_mmlu_full_nl_marketing.yaml | 5 ++ .../global_mmlu_full_nl_medical_genetics.yaml | 5 ++ .../nl/global_mmlu_full_nl_miscellaneous.yaml | 5 ++ .../global_mmlu_full_nl_moral_disputes.yaml | 5 ++ .../global_mmlu_full_nl_moral_scenarios.yaml | 5 ++ .../nl/global_mmlu_full_nl_nutrition.yaml | 5 ++ .../nl/global_mmlu_full_nl_philosophy.yaml | 5 ++ .../nl/global_mmlu_full_nl_prehistory.yaml | 5 ++ ..._mmlu_full_nl_professional_accounting.yaml | 5 ++ .../global_mmlu_full_nl_professional_law.yaml | 5 ++ ...al_mmlu_full_nl_professional_medicine.yaml | 5 ++ ..._mmlu_full_nl_professional_psychology.yaml | 5 ++ .../global_mmlu_full_nl_public_relations.yaml | 5 ++ .../global_mmlu_full_nl_security_studies.yaml | 5 ++ .../nl/global_mmlu_full_nl_sociology.yaml | 5 ++ ...global_mmlu_full_nl_us_foreign_policy.yaml | 5 ++ .../full/nl/global_mmlu_full_nl_virology.yaml | 5 ++ .../global_mmlu_full_nl_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/nl/utils.py | 73 +++++++++++++++++++ .../full/ny/_global_mmlu_full_ny.yaml | 11 +++ .../ny/_global_mmlu_full_ny_humanities.yaml | 8 ++ .../full/ny/_global_mmlu_full_ny_other.yaml | 8 ++ .../_global_mmlu_full_ny_social_sciences.yaml | 8 ++ .../full/ny/_global_mmlu_full_ny_stem.yaml | 8 ++ .../global_mmlu/full/ny/_ny_template_yaml | 16 ++++ .../global_mmlu_full_ny_abstract_algebra.yaml | 5 ++ .../full/ny/global_mmlu_full_ny_anatomy.yaml | 5 ++ .../ny/global_mmlu_full_ny_astronomy.yaml | 5 ++ .../global_mmlu_full_ny_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ny_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ny_college_biology.yaml | 5 ++ ...global_mmlu_full_ny_college_chemistry.yaml | 5 ++ ...mmlu_full_ny_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ny_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ny_college_medicine.yaml | 5 ++ .../global_mmlu_full_ny_college_physics.yaml | 5 ++ ...global_mmlu_full_ny_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ny_conceptual_physics.yaml | 5 ++ .../ny/global_mmlu_full_ny_econometrics.yaml | 5 ++ ...l_mmlu_full_ny_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ny_elementary_mathematics.yaml | 5 ++ .../ny/global_mmlu_full_ny_formal_logic.yaml | 5 ++ .../ny/global_mmlu_full_ny_global_facts.yaml | 5 ++ ...obal_mmlu_full_ny_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ny_high_school_chemistry.yaml | 5 ++ ..._full_ny_high_school_computer_science.yaml | 5 ++ ..._full_ny_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ny_high_school_geography.yaml | 5 ++ ...y_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ny_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ny_high_school_mathematics.yaml | 5 ++ ...lu_full_ny_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ny_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ny_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ny_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ny_high_school_us_history.yaml | 5 ++ ...mlu_full_ny_high_school_world_history.yaml | 5 ++ .../ny/global_mmlu_full_ny_human_aging.yaml | 5 ++ .../global_mmlu_full_ny_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ny_international_law.yaml | 5 ++ .../ny/global_mmlu_full_ny_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ny_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ny_machine_learning.yaml | 5 ++ .../ny/global_mmlu_full_ny_management.yaml | 5 ++ .../ny/global_mmlu_full_ny_marketing.yaml | 5 ++ .../global_mmlu_full_ny_medical_genetics.yaml | 5 ++ .../ny/global_mmlu_full_ny_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ny_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ny_moral_scenarios.yaml | 5 ++ .../ny/global_mmlu_full_ny_nutrition.yaml | 5 ++ .../ny/global_mmlu_full_ny_philosophy.yaml | 5 ++ .../ny/global_mmlu_full_ny_prehistory.yaml | 5 ++ ..._mmlu_full_ny_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ny_professional_law.yaml | 5 ++ ...al_mmlu_full_ny_professional_medicine.yaml | 5 ++ ..._mmlu_full_ny_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ny_public_relations.yaml | 5 ++ .../global_mmlu_full_ny_security_studies.yaml | 5 ++ .../ny/global_mmlu_full_ny_sociology.yaml | 5 ++ ...global_mmlu_full_ny_us_foreign_policy.yaml | 5 ++ .../full/ny/global_mmlu_full_ny_virology.yaml | 5 ++ .../global_mmlu_full_ny_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ny/utils.py | 73 +++++++++++++++++++ .../full/pl/_global_mmlu_full_pl.yaml | 11 +++ .../pl/_global_mmlu_full_pl_humanities.yaml | 8 ++ .../full/pl/_global_mmlu_full_pl_other.yaml | 8 ++ .../_global_mmlu_full_pl_social_sciences.yaml | 8 ++ .../full/pl/_global_mmlu_full_pl_stem.yaml | 8 ++ .../global_mmlu/full/pl/_pl_template_yaml | 16 ++++ .../global_mmlu_full_pl_abstract_algebra.yaml | 5 ++ .../full/pl/global_mmlu_full_pl_anatomy.yaml | 5 ++ .../pl/global_mmlu_full_pl_astronomy.yaml | 5 ++ .../global_mmlu_full_pl_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_pl_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_pl_college_biology.yaml | 5 ++ ...global_mmlu_full_pl_college_chemistry.yaml | 5 ++ ...mmlu_full_pl_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_pl_college_mathematics.yaml | 5 ++ .../global_mmlu_full_pl_college_medicine.yaml | 5 ++ .../global_mmlu_full_pl_college_physics.yaml | 5 ++ ...global_mmlu_full_pl_computer_security.yaml | 5 ++ ...lobal_mmlu_full_pl_conceptual_physics.yaml | 5 ++ .../pl/global_mmlu_full_pl_econometrics.yaml | 5 ++ ...l_mmlu_full_pl_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_pl_elementary_mathematics.yaml | 5 ++ .../pl/global_mmlu_full_pl_formal_logic.yaml | 5 ++ .../pl/global_mmlu_full_pl_global_facts.yaml | 5 ++ ...obal_mmlu_full_pl_high_school_biology.yaml | 5 ++ ...al_mmlu_full_pl_high_school_chemistry.yaml | 5 ++ ..._full_pl_high_school_computer_science.yaml | 5 ++ ..._full_pl_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_pl_high_school_geography.yaml | 5 ++ ...l_high_school_government_and_politics.yaml | 5 ++ ...lu_full_pl_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_pl_high_school_mathematics.yaml | 5 ++ ...lu_full_pl_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_pl_high_school_physics.yaml | 5 ++ ...l_mmlu_full_pl_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_pl_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_pl_high_school_us_history.yaml | 5 ++ ...mlu_full_pl_high_school_world_history.yaml | 5 ++ .../pl/global_mmlu_full_pl_human_aging.yaml | 5 ++ .../global_mmlu_full_pl_human_sexuality.yaml | 5 ++ ...global_mmlu_full_pl_international_law.yaml | 5 ++ .../pl/global_mmlu_full_pl_jurisprudence.yaml | 5 ++ ...global_mmlu_full_pl_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_pl_machine_learning.yaml | 5 ++ .../pl/global_mmlu_full_pl_management.yaml | 5 ++ .../pl/global_mmlu_full_pl_marketing.yaml | 5 ++ .../global_mmlu_full_pl_medical_genetics.yaml | 5 ++ .../pl/global_mmlu_full_pl_miscellaneous.yaml | 5 ++ .../global_mmlu_full_pl_moral_disputes.yaml | 5 ++ .../global_mmlu_full_pl_moral_scenarios.yaml | 5 ++ .../pl/global_mmlu_full_pl_nutrition.yaml | 5 ++ .../pl/global_mmlu_full_pl_philosophy.yaml | 5 ++ .../pl/global_mmlu_full_pl_prehistory.yaml | 5 ++ ..._mmlu_full_pl_professional_accounting.yaml | 5 ++ .../global_mmlu_full_pl_professional_law.yaml | 5 ++ ...al_mmlu_full_pl_professional_medicine.yaml | 5 ++ ..._mmlu_full_pl_professional_psychology.yaml | 5 ++ .../global_mmlu_full_pl_public_relations.yaml | 5 ++ .../global_mmlu_full_pl_security_studies.yaml | 5 ++ .../pl/global_mmlu_full_pl_sociology.yaml | 5 ++ ...global_mmlu_full_pl_us_foreign_policy.yaml | 5 ++ .../full/pl/global_mmlu_full_pl_virology.yaml | 5 ++ .../global_mmlu_full_pl_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/pl/utils.py | 73 +++++++++++++++++++ .../full/pt/_global_mmlu_full_pt.yaml | 11 +++ .../pt/_global_mmlu_full_pt_humanities.yaml | 8 ++ .../full/pt/_global_mmlu_full_pt_other.yaml | 8 ++ .../_global_mmlu_full_pt_social_sciences.yaml | 8 ++ .../full/pt/_global_mmlu_full_pt_stem.yaml | 8 ++ .../global_mmlu/full/pt/_pt_template_yaml | 16 ++++ .../global_mmlu_full_pt_abstract_algebra.yaml | 5 ++ .../full/pt/global_mmlu_full_pt_anatomy.yaml | 5 ++ .../pt/global_mmlu_full_pt_astronomy.yaml | 5 ++ .../global_mmlu_full_pt_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_pt_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_pt_college_biology.yaml | 5 ++ ...global_mmlu_full_pt_college_chemistry.yaml | 5 ++ ...mmlu_full_pt_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_pt_college_mathematics.yaml | 5 ++ .../global_mmlu_full_pt_college_medicine.yaml | 5 ++ .../global_mmlu_full_pt_college_physics.yaml | 5 ++ ...global_mmlu_full_pt_computer_security.yaml | 5 ++ ...lobal_mmlu_full_pt_conceptual_physics.yaml | 5 ++ .../pt/global_mmlu_full_pt_econometrics.yaml | 5 ++ ...l_mmlu_full_pt_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_pt_elementary_mathematics.yaml | 5 ++ .../pt/global_mmlu_full_pt_formal_logic.yaml | 5 ++ .../pt/global_mmlu_full_pt_global_facts.yaml | 5 ++ ...obal_mmlu_full_pt_high_school_biology.yaml | 5 ++ ...al_mmlu_full_pt_high_school_chemistry.yaml | 5 ++ ..._full_pt_high_school_computer_science.yaml | 5 ++ ..._full_pt_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_pt_high_school_geography.yaml | 5 ++ ...t_high_school_government_and_politics.yaml | 5 ++ ...lu_full_pt_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_pt_high_school_mathematics.yaml | 5 ++ ...lu_full_pt_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_pt_high_school_physics.yaml | 5 ++ ...l_mmlu_full_pt_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_pt_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_pt_high_school_us_history.yaml | 5 ++ ...mlu_full_pt_high_school_world_history.yaml | 5 ++ .../pt/global_mmlu_full_pt_human_aging.yaml | 5 ++ .../global_mmlu_full_pt_human_sexuality.yaml | 5 ++ ...global_mmlu_full_pt_international_law.yaml | 5 ++ .../pt/global_mmlu_full_pt_jurisprudence.yaml | 5 ++ ...global_mmlu_full_pt_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_pt_machine_learning.yaml | 5 ++ .../pt/global_mmlu_full_pt_management.yaml | 5 ++ .../pt/global_mmlu_full_pt_marketing.yaml | 5 ++ .../global_mmlu_full_pt_medical_genetics.yaml | 5 ++ .../pt/global_mmlu_full_pt_miscellaneous.yaml | 5 ++ .../global_mmlu_full_pt_moral_disputes.yaml | 5 ++ .../global_mmlu_full_pt_moral_scenarios.yaml | 5 ++ .../pt/global_mmlu_full_pt_nutrition.yaml | 5 ++ .../pt/global_mmlu_full_pt_philosophy.yaml | 5 ++ .../pt/global_mmlu_full_pt_prehistory.yaml | 5 ++ ..._mmlu_full_pt_professional_accounting.yaml | 5 ++ .../global_mmlu_full_pt_professional_law.yaml | 5 ++ ...al_mmlu_full_pt_professional_medicine.yaml | 5 ++ ..._mmlu_full_pt_professional_psychology.yaml | 5 ++ .../global_mmlu_full_pt_public_relations.yaml | 5 ++ .../global_mmlu_full_pt_security_studies.yaml | 5 ++ .../pt/global_mmlu_full_pt_sociology.yaml | 5 ++ ...global_mmlu_full_pt_us_foreign_policy.yaml | 5 ++ .../full/pt/global_mmlu_full_pt_virology.yaml | 5 ++ .../global_mmlu_full_pt_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/pt/utils.py | 73 +++++++++++++++++++ .../full/ro/_global_mmlu_full_ro.yaml | 11 +++ .../ro/_global_mmlu_full_ro_humanities.yaml | 8 ++ .../full/ro/_global_mmlu_full_ro_other.yaml | 8 ++ .../_global_mmlu_full_ro_social_sciences.yaml | 8 ++ .../full/ro/_global_mmlu_full_ro_stem.yaml | 8 ++ .../global_mmlu/full/ro/_ro_template_yaml | 16 ++++ .../global_mmlu_full_ro_abstract_algebra.yaml | 5 ++ .../full/ro/global_mmlu_full_ro_anatomy.yaml | 5 ++ .../ro/global_mmlu_full_ro_astronomy.yaml | 5 ++ .../global_mmlu_full_ro_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ro_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ro_college_biology.yaml | 5 ++ ...global_mmlu_full_ro_college_chemistry.yaml | 5 ++ ...mmlu_full_ro_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ro_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ro_college_medicine.yaml | 5 ++ .../global_mmlu_full_ro_college_physics.yaml | 5 ++ ...global_mmlu_full_ro_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ro_conceptual_physics.yaml | 5 ++ .../ro/global_mmlu_full_ro_econometrics.yaml | 5 ++ ...l_mmlu_full_ro_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ro_elementary_mathematics.yaml | 5 ++ .../ro/global_mmlu_full_ro_formal_logic.yaml | 5 ++ .../ro/global_mmlu_full_ro_global_facts.yaml | 5 ++ ...obal_mmlu_full_ro_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ro_high_school_chemistry.yaml | 5 ++ ..._full_ro_high_school_computer_science.yaml | 5 ++ ..._full_ro_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ro_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ro_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ro_high_school_mathematics.yaml | 5 ++ ...lu_full_ro_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ro_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ro_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ro_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ro_high_school_us_history.yaml | 5 ++ ...mlu_full_ro_high_school_world_history.yaml | 5 ++ .../ro/global_mmlu_full_ro_human_aging.yaml | 5 ++ .../global_mmlu_full_ro_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ro_international_law.yaml | 5 ++ .../ro/global_mmlu_full_ro_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ro_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ro_machine_learning.yaml | 5 ++ .../ro/global_mmlu_full_ro_management.yaml | 5 ++ .../ro/global_mmlu_full_ro_marketing.yaml | 5 ++ .../global_mmlu_full_ro_medical_genetics.yaml | 5 ++ .../ro/global_mmlu_full_ro_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ro_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ro_moral_scenarios.yaml | 5 ++ .../ro/global_mmlu_full_ro_nutrition.yaml | 5 ++ .../ro/global_mmlu_full_ro_philosophy.yaml | 5 ++ .../ro/global_mmlu_full_ro_prehistory.yaml | 5 ++ ..._mmlu_full_ro_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ro_professional_law.yaml | 5 ++ ...al_mmlu_full_ro_professional_medicine.yaml | 5 ++ ..._mmlu_full_ro_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ro_public_relations.yaml | 5 ++ .../global_mmlu_full_ro_security_studies.yaml | 5 ++ .../ro/global_mmlu_full_ro_sociology.yaml | 5 ++ ...global_mmlu_full_ro_us_foreign_policy.yaml | 5 ++ .../full/ro/global_mmlu_full_ro_virology.yaml | 5 ++ .../global_mmlu_full_ro_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ro/utils.py | 73 +++++++++++++++++++ .../full/ru/_global_mmlu_full_ru.yaml | 11 +++ .../ru/_global_mmlu_full_ru_humanities.yaml | 8 ++ .../full/ru/_global_mmlu_full_ru_other.yaml | 8 ++ .../_global_mmlu_full_ru_social_sciences.yaml | 8 ++ .../full/ru/_global_mmlu_full_ru_stem.yaml | 8 ++ .../global_mmlu/full/ru/_ru_template_yaml | 16 ++++ .../global_mmlu_full_ru_abstract_algebra.yaml | 5 ++ .../full/ru/global_mmlu_full_ru_anatomy.yaml | 5 ++ .../ru/global_mmlu_full_ru_astronomy.yaml | 5 ++ .../global_mmlu_full_ru_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_ru_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_ru_college_biology.yaml | 5 ++ ...global_mmlu_full_ru_college_chemistry.yaml | 5 ++ ...mmlu_full_ru_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_ru_college_mathematics.yaml | 5 ++ .../global_mmlu_full_ru_college_medicine.yaml | 5 ++ .../global_mmlu_full_ru_college_physics.yaml | 5 ++ ...global_mmlu_full_ru_computer_security.yaml | 5 ++ ...lobal_mmlu_full_ru_conceptual_physics.yaml | 5 ++ .../ru/global_mmlu_full_ru_econometrics.yaml | 5 ++ ...l_mmlu_full_ru_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_ru_elementary_mathematics.yaml | 5 ++ .../ru/global_mmlu_full_ru_formal_logic.yaml | 5 ++ .../ru/global_mmlu_full_ru_global_facts.yaml | 5 ++ ...obal_mmlu_full_ru_high_school_biology.yaml | 5 ++ ...al_mmlu_full_ru_high_school_chemistry.yaml | 5 ++ ..._full_ru_high_school_computer_science.yaml | 5 ++ ..._full_ru_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_ru_high_school_geography.yaml | 5 ++ ...u_high_school_government_and_politics.yaml | 5 ++ ...lu_full_ru_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_ru_high_school_mathematics.yaml | 5 ++ ...lu_full_ru_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_ru_high_school_physics.yaml | 5 ++ ...l_mmlu_full_ru_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_ru_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_ru_high_school_us_history.yaml | 5 ++ ...mlu_full_ru_high_school_world_history.yaml | 5 ++ .../ru/global_mmlu_full_ru_human_aging.yaml | 5 ++ .../global_mmlu_full_ru_human_sexuality.yaml | 5 ++ ...global_mmlu_full_ru_international_law.yaml | 5 ++ .../ru/global_mmlu_full_ru_jurisprudence.yaml | 5 ++ ...global_mmlu_full_ru_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_ru_machine_learning.yaml | 5 ++ .../ru/global_mmlu_full_ru_management.yaml | 5 ++ .../ru/global_mmlu_full_ru_marketing.yaml | 5 ++ .../global_mmlu_full_ru_medical_genetics.yaml | 5 ++ .../ru/global_mmlu_full_ru_miscellaneous.yaml | 5 ++ .../global_mmlu_full_ru_moral_disputes.yaml | 5 ++ .../global_mmlu_full_ru_moral_scenarios.yaml | 5 ++ .../ru/global_mmlu_full_ru_nutrition.yaml | 5 ++ .../ru/global_mmlu_full_ru_philosophy.yaml | 5 ++ .../ru/global_mmlu_full_ru_prehistory.yaml | 5 ++ ..._mmlu_full_ru_professional_accounting.yaml | 5 ++ .../global_mmlu_full_ru_professional_law.yaml | 5 ++ ...al_mmlu_full_ru_professional_medicine.yaml | 5 ++ ..._mmlu_full_ru_professional_psychology.yaml | 5 ++ .../global_mmlu_full_ru_public_relations.yaml | 5 ++ .../global_mmlu_full_ru_security_studies.yaml | 5 ++ .../ru/global_mmlu_full_ru_sociology.yaml | 5 ++ ...global_mmlu_full_ru_us_foreign_policy.yaml | 5 ++ .../full/ru/global_mmlu_full_ru_virology.yaml | 5 ++ .../global_mmlu_full_ru_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/ru/utils.py | 73 +++++++++++++++++++ .../full/si/_global_mmlu_full_si.yaml | 11 +++ .../si/_global_mmlu_full_si_humanities.yaml | 8 ++ .../full/si/_global_mmlu_full_si_other.yaml | 8 ++ .../_global_mmlu_full_si_social_sciences.yaml | 8 ++ .../full/si/_global_mmlu_full_si_stem.yaml | 8 ++ .../global_mmlu/full/si/_si_template_yaml | 16 ++++ .../global_mmlu_full_si_abstract_algebra.yaml | 5 ++ .../full/si/global_mmlu_full_si_anatomy.yaml | 5 ++ .../si/global_mmlu_full_si_astronomy.yaml | 5 ++ .../global_mmlu_full_si_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_si_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_si_college_biology.yaml | 5 ++ ...global_mmlu_full_si_college_chemistry.yaml | 5 ++ ...mmlu_full_si_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_si_college_mathematics.yaml | 5 ++ .../global_mmlu_full_si_college_medicine.yaml | 5 ++ .../global_mmlu_full_si_college_physics.yaml | 5 ++ ...global_mmlu_full_si_computer_security.yaml | 5 ++ ...lobal_mmlu_full_si_conceptual_physics.yaml | 5 ++ .../si/global_mmlu_full_si_econometrics.yaml | 5 ++ ...l_mmlu_full_si_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_si_elementary_mathematics.yaml | 5 ++ .../si/global_mmlu_full_si_formal_logic.yaml | 5 ++ .../si/global_mmlu_full_si_global_facts.yaml | 5 ++ ...obal_mmlu_full_si_high_school_biology.yaml | 5 ++ ...al_mmlu_full_si_high_school_chemistry.yaml | 5 ++ ..._full_si_high_school_computer_science.yaml | 5 ++ ..._full_si_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_si_high_school_geography.yaml | 5 ++ ...i_high_school_government_and_politics.yaml | 5 ++ ...lu_full_si_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_si_high_school_mathematics.yaml | 5 ++ ...lu_full_si_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_si_high_school_physics.yaml | 5 ++ ...l_mmlu_full_si_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_si_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_si_high_school_us_history.yaml | 5 ++ ...mlu_full_si_high_school_world_history.yaml | 5 ++ .../si/global_mmlu_full_si_human_aging.yaml | 5 ++ .../global_mmlu_full_si_human_sexuality.yaml | 5 ++ ...global_mmlu_full_si_international_law.yaml | 5 ++ .../si/global_mmlu_full_si_jurisprudence.yaml | 5 ++ ...global_mmlu_full_si_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_si_machine_learning.yaml | 5 ++ .../si/global_mmlu_full_si_management.yaml | 5 ++ .../si/global_mmlu_full_si_marketing.yaml | 5 ++ .../global_mmlu_full_si_medical_genetics.yaml | 5 ++ .../si/global_mmlu_full_si_miscellaneous.yaml | 5 ++ .../global_mmlu_full_si_moral_disputes.yaml | 5 ++ .../global_mmlu_full_si_moral_scenarios.yaml | 5 ++ .../si/global_mmlu_full_si_nutrition.yaml | 5 ++ .../si/global_mmlu_full_si_philosophy.yaml | 5 ++ .../si/global_mmlu_full_si_prehistory.yaml | 5 ++ ..._mmlu_full_si_professional_accounting.yaml | 5 ++ .../global_mmlu_full_si_professional_law.yaml | 5 ++ ...al_mmlu_full_si_professional_medicine.yaml | 5 ++ ..._mmlu_full_si_professional_psychology.yaml | 5 ++ .../global_mmlu_full_si_public_relations.yaml | 5 ++ .../global_mmlu_full_si_security_studies.yaml | 5 ++ .../si/global_mmlu_full_si_sociology.yaml | 5 ++ ...global_mmlu_full_si_us_foreign_policy.yaml | 5 ++ .../full/si/global_mmlu_full_si_virology.yaml | 5 ++ .../global_mmlu_full_si_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/si/utils.py | 73 +++++++++++++++++++ .../full/sn/_global_mmlu_full_sn.yaml | 11 +++ .../sn/_global_mmlu_full_sn_humanities.yaml | 8 ++ .../full/sn/_global_mmlu_full_sn_other.yaml | 8 ++ .../_global_mmlu_full_sn_social_sciences.yaml | 8 ++ .../full/sn/_global_mmlu_full_sn_stem.yaml | 8 ++ .../global_mmlu/full/sn/_sn_template_yaml | 16 ++++ .../global_mmlu_full_sn_abstract_algebra.yaml | 5 ++ .../full/sn/global_mmlu_full_sn_anatomy.yaml | 5 ++ .../sn/global_mmlu_full_sn_astronomy.yaml | 5 ++ .../global_mmlu_full_sn_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sn_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sn_college_biology.yaml | 5 ++ ...global_mmlu_full_sn_college_chemistry.yaml | 5 ++ ...mmlu_full_sn_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sn_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sn_college_medicine.yaml | 5 ++ .../global_mmlu_full_sn_college_physics.yaml | 5 ++ ...global_mmlu_full_sn_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sn_conceptual_physics.yaml | 5 ++ .../sn/global_mmlu_full_sn_econometrics.yaml | 5 ++ ...l_mmlu_full_sn_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sn_elementary_mathematics.yaml | 5 ++ .../sn/global_mmlu_full_sn_formal_logic.yaml | 5 ++ .../sn/global_mmlu_full_sn_global_facts.yaml | 5 ++ ...obal_mmlu_full_sn_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sn_high_school_chemistry.yaml | 5 ++ ..._full_sn_high_school_computer_science.yaml | 5 ++ ..._full_sn_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sn_high_school_geography.yaml | 5 ++ ...n_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sn_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sn_high_school_mathematics.yaml | 5 ++ ...lu_full_sn_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sn_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sn_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sn_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sn_high_school_us_history.yaml | 5 ++ ...mlu_full_sn_high_school_world_history.yaml | 5 ++ .../sn/global_mmlu_full_sn_human_aging.yaml | 5 ++ .../global_mmlu_full_sn_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sn_international_law.yaml | 5 ++ .../sn/global_mmlu_full_sn_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sn_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sn_machine_learning.yaml | 5 ++ .../sn/global_mmlu_full_sn_management.yaml | 5 ++ .../sn/global_mmlu_full_sn_marketing.yaml | 5 ++ .../global_mmlu_full_sn_medical_genetics.yaml | 5 ++ .../sn/global_mmlu_full_sn_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sn_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sn_moral_scenarios.yaml | 5 ++ .../sn/global_mmlu_full_sn_nutrition.yaml | 5 ++ .../sn/global_mmlu_full_sn_philosophy.yaml | 5 ++ .../sn/global_mmlu_full_sn_prehistory.yaml | 5 ++ ..._mmlu_full_sn_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sn_professional_law.yaml | 5 ++ ...al_mmlu_full_sn_professional_medicine.yaml | 5 ++ ..._mmlu_full_sn_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sn_public_relations.yaml | 5 ++ .../global_mmlu_full_sn_security_studies.yaml | 5 ++ .../sn/global_mmlu_full_sn_sociology.yaml | 5 ++ ...global_mmlu_full_sn_us_foreign_policy.yaml | 5 ++ .../full/sn/global_mmlu_full_sn_virology.yaml | 5 ++ .../global_mmlu_full_sn_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sn/utils.py | 73 +++++++++++++++++++ .../full/so/_global_mmlu_full_so.yaml | 11 +++ .../so/_global_mmlu_full_so_humanities.yaml | 8 ++ .../full/so/_global_mmlu_full_so_other.yaml | 8 ++ .../_global_mmlu_full_so_social_sciences.yaml | 8 ++ .../full/so/_global_mmlu_full_so_stem.yaml | 8 ++ .../global_mmlu/full/so/_so_template_yaml | 16 ++++ .../global_mmlu_full_so_abstract_algebra.yaml | 5 ++ .../full/so/global_mmlu_full_so_anatomy.yaml | 5 ++ .../so/global_mmlu_full_so_astronomy.yaml | 5 ++ .../global_mmlu_full_so_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_so_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_so_college_biology.yaml | 5 ++ ...global_mmlu_full_so_college_chemistry.yaml | 5 ++ ...mmlu_full_so_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_so_college_mathematics.yaml | 5 ++ .../global_mmlu_full_so_college_medicine.yaml | 5 ++ .../global_mmlu_full_so_college_physics.yaml | 5 ++ ...global_mmlu_full_so_computer_security.yaml | 5 ++ ...lobal_mmlu_full_so_conceptual_physics.yaml | 5 ++ .../so/global_mmlu_full_so_econometrics.yaml | 5 ++ ...l_mmlu_full_so_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_so_elementary_mathematics.yaml | 5 ++ .../so/global_mmlu_full_so_formal_logic.yaml | 5 ++ .../so/global_mmlu_full_so_global_facts.yaml | 5 ++ ...obal_mmlu_full_so_high_school_biology.yaml | 5 ++ ...al_mmlu_full_so_high_school_chemistry.yaml | 5 ++ ..._full_so_high_school_computer_science.yaml | 5 ++ ..._full_so_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_so_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_so_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_so_high_school_mathematics.yaml | 5 ++ ...lu_full_so_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_so_high_school_physics.yaml | 5 ++ ...l_mmlu_full_so_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_so_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_so_high_school_us_history.yaml | 5 ++ ...mlu_full_so_high_school_world_history.yaml | 5 ++ .../so/global_mmlu_full_so_human_aging.yaml | 5 ++ .../global_mmlu_full_so_human_sexuality.yaml | 5 ++ ...global_mmlu_full_so_international_law.yaml | 5 ++ .../so/global_mmlu_full_so_jurisprudence.yaml | 5 ++ ...global_mmlu_full_so_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_so_machine_learning.yaml | 5 ++ .../so/global_mmlu_full_so_management.yaml | 5 ++ .../so/global_mmlu_full_so_marketing.yaml | 5 ++ .../global_mmlu_full_so_medical_genetics.yaml | 5 ++ .../so/global_mmlu_full_so_miscellaneous.yaml | 5 ++ .../global_mmlu_full_so_moral_disputes.yaml | 5 ++ .../global_mmlu_full_so_moral_scenarios.yaml | 5 ++ .../so/global_mmlu_full_so_nutrition.yaml | 5 ++ .../so/global_mmlu_full_so_philosophy.yaml | 5 ++ .../so/global_mmlu_full_so_prehistory.yaml | 5 ++ ..._mmlu_full_so_professional_accounting.yaml | 5 ++ .../global_mmlu_full_so_professional_law.yaml | 5 ++ ...al_mmlu_full_so_professional_medicine.yaml | 5 ++ ..._mmlu_full_so_professional_psychology.yaml | 5 ++ .../global_mmlu_full_so_public_relations.yaml | 5 ++ .../global_mmlu_full_so_security_studies.yaml | 5 ++ .../so/global_mmlu_full_so_sociology.yaml | 5 ++ ...global_mmlu_full_so_us_foreign_policy.yaml | 5 ++ .../full/so/global_mmlu_full_so_virology.yaml | 5 ++ .../global_mmlu_full_so_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/so/utils.py | 73 +++++++++++++++++++ .../full/sr/_global_mmlu_full_sr.yaml | 11 +++ .../sr/_global_mmlu_full_sr_humanities.yaml | 8 ++ .../full/sr/_global_mmlu_full_sr_other.yaml | 8 ++ .../_global_mmlu_full_sr_social_sciences.yaml | 8 ++ .../full/sr/_global_mmlu_full_sr_stem.yaml | 8 ++ .../global_mmlu/full/sr/_sr_template_yaml | 16 ++++ .../global_mmlu_full_sr_abstract_algebra.yaml | 5 ++ .../full/sr/global_mmlu_full_sr_anatomy.yaml | 5 ++ .../sr/global_mmlu_full_sr_astronomy.yaml | 5 ++ .../global_mmlu_full_sr_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sr_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sr_college_biology.yaml | 5 ++ ...global_mmlu_full_sr_college_chemistry.yaml | 5 ++ ...mmlu_full_sr_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sr_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sr_college_medicine.yaml | 5 ++ .../global_mmlu_full_sr_college_physics.yaml | 5 ++ ...global_mmlu_full_sr_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sr_conceptual_physics.yaml | 5 ++ .../sr/global_mmlu_full_sr_econometrics.yaml | 5 ++ ...l_mmlu_full_sr_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sr_elementary_mathematics.yaml | 5 ++ .../sr/global_mmlu_full_sr_formal_logic.yaml | 5 ++ .../sr/global_mmlu_full_sr_global_facts.yaml | 5 ++ ...obal_mmlu_full_sr_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sr_high_school_chemistry.yaml | 5 ++ ..._full_sr_high_school_computer_science.yaml | 5 ++ ..._full_sr_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sr_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sr_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sr_high_school_mathematics.yaml | 5 ++ ...lu_full_sr_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sr_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sr_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sr_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sr_high_school_us_history.yaml | 5 ++ ...mlu_full_sr_high_school_world_history.yaml | 5 ++ .../sr/global_mmlu_full_sr_human_aging.yaml | 5 ++ .../global_mmlu_full_sr_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sr_international_law.yaml | 5 ++ .../sr/global_mmlu_full_sr_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sr_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sr_machine_learning.yaml | 5 ++ .../sr/global_mmlu_full_sr_management.yaml | 5 ++ .../sr/global_mmlu_full_sr_marketing.yaml | 5 ++ .../global_mmlu_full_sr_medical_genetics.yaml | 5 ++ .../sr/global_mmlu_full_sr_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sr_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sr_moral_scenarios.yaml | 5 ++ .../sr/global_mmlu_full_sr_nutrition.yaml | 5 ++ .../sr/global_mmlu_full_sr_philosophy.yaml | 5 ++ .../sr/global_mmlu_full_sr_prehistory.yaml | 5 ++ ..._mmlu_full_sr_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sr_professional_law.yaml | 5 ++ ...al_mmlu_full_sr_professional_medicine.yaml | 5 ++ ..._mmlu_full_sr_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sr_public_relations.yaml | 5 ++ .../global_mmlu_full_sr_security_studies.yaml | 5 ++ .../sr/global_mmlu_full_sr_sociology.yaml | 5 ++ ...global_mmlu_full_sr_us_foreign_policy.yaml | 5 ++ .../full/sr/global_mmlu_full_sr_virology.yaml | 5 ++ .../global_mmlu_full_sr_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sr/utils.py | 73 +++++++++++++++++++ .../full/sv/_global_mmlu_full_sv.yaml | 11 +++ .../sv/_global_mmlu_full_sv_humanities.yaml | 8 ++ .../full/sv/_global_mmlu_full_sv_other.yaml | 8 ++ .../_global_mmlu_full_sv_social_sciences.yaml | 8 ++ .../full/sv/_global_mmlu_full_sv_stem.yaml | 8 ++ .../global_mmlu/full/sv/_sv_template_yaml | 16 ++++ .../global_mmlu_full_sv_abstract_algebra.yaml | 5 ++ .../full/sv/global_mmlu_full_sv_anatomy.yaml | 5 ++ .../sv/global_mmlu_full_sv_astronomy.yaml | 5 ++ .../global_mmlu_full_sv_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sv_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sv_college_biology.yaml | 5 ++ ...global_mmlu_full_sv_college_chemistry.yaml | 5 ++ ...mmlu_full_sv_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sv_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sv_college_medicine.yaml | 5 ++ .../global_mmlu_full_sv_college_physics.yaml | 5 ++ ...global_mmlu_full_sv_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sv_conceptual_physics.yaml | 5 ++ .../sv/global_mmlu_full_sv_econometrics.yaml | 5 ++ ...l_mmlu_full_sv_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sv_elementary_mathematics.yaml | 5 ++ .../sv/global_mmlu_full_sv_formal_logic.yaml | 5 ++ .../sv/global_mmlu_full_sv_global_facts.yaml | 5 ++ ...obal_mmlu_full_sv_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sv_high_school_chemistry.yaml | 5 ++ ..._full_sv_high_school_computer_science.yaml | 5 ++ ..._full_sv_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sv_high_school_geography.yaml | 5 ++ ...v_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sv_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sv_high_school_mathematics.yaml | 5 ++ ...lu_full_sv_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sv_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sv_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sv_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sv_high_school_us_history.yaml | 5 ++ ...mlu_full_sv_high_school_world_history.yaml | 5 ++ .../sv/global_mmlu_full_sv_human_aging.yaml | 5 ++ .../global_mmlu_full_sv_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sv_international_law.yaml | 5 ++ .../sv/global_mmlu_full_sv_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sv_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sv_machine_learning.yaml | 5 ++ .../sv/global_mmlu_full_sv_management.yaml | 5 ++ .../sv/global_mmlu_full_sv_marketing.yaml | 5 ++ .../global_mmlu_full_sv_medical_genetics.yaml | 5 ++ .../sv/global_mmlu_full_sv_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sv_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sv_moral_scenarios.yaml | 5 ++ .../sv/global_mmlu_full_sv_nutrition.yaml | 5 ++ .../sv/global_mmlu_full_sv_philosophy.yaml | 5 ++ .../sv/global_mmlu_full_sv_prehistory.yaml | 5 ++ ..._mmlu_full_sv_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sv_professional_law.yaml | 5 ++ ...al_mmlu_full_sv_professional_medicine.yaml | 5 ++ ..._mmlu_full_sv_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sv_public_relations.yaml | 5 ++ .../global_mmlu_full_sv_security_studies.yaml | 5 ++ .../sv/global_mmlu_full_sv_sociology.yaml | 5 ++ ...global_mmlu_full_sv_us_foreign_policy.yaml | 5 ++ .../full/sv/global_mmlu_full_sv_virology.yaml | 5 ++ .../global_mmlu_full_sv_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sv/utils.py | 73 +++++++++++++++++++ .../full/sw/_global_mmlu_full_sw.yaml | 11 +++ .../sw/_global_mmlu_full_sw_humanities.yaml | 8 ++ .../full/sw/_global_mmlu_full_sw_other.yaml | 8 ++ .../_global_mmlu_full_sw_social_sciences.yaml | 8 ++ .../full/sw/_global_mmlu_full_sw_stem.yaml | 8 ++ .../global_mmlu/full/sw/_sw_template_yaml | 16 ++++ .../global_mmlu_full_sw_abstract_algebra.yaml | 5 ++ .../full/sw/global_mmlu_full_sw_anatomy.yaml | 5 ++ .../sw/global_mmlu_full_sw_astronomy.yaml | 5 ++ .../global_mmlu_full_sw_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_sw_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_sw_college_biology.yaml | 5 ++ ...global_mmlu_full_sw_college_chemistry.yaml | 5 ++ ...mmlu_full_sw_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_sw_college_mathematics.yaml | 5 ++ .../global_mmlu_full_sw_college_medicine.yaml | 5 ++ .../global_mmlu_full_sw_college_physics.yaml | 5 ++ ...global_mmlu_full_sw_computer_security.yaml | 5 ++ ...lobal_mmlu_full_sw_conceptual_physics.yaml | 5 ++ .../sw/global_mmlu_full_sw_econometrics.yaml | 5 ++ ...l_mmlu_full_sw_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_sw_elementary_mathematics.yaml | 5 ++ .../sw/global_mmlu_full_sw_formal_logic.yaml | 5 ++ .../sw/global_mmlu_full_sw_global_facts.yaml | 5 ++ ...obal_mmlu_full_sw_high_school_biology.yaml | 5 ++ ...al_mmlu_full_sw_high_school_chemistry.yaml | 5 ++ ..._full_sw_high_school_computer_science.yaml | 5 ++ ..._full_sw_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_sw_high_school_geography.yaml | 5 ++ ...w_high_school_government_and_politics.yaml | 5 ++ ...lu_full_sw_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_sw_high_school_mathematics.yaml | 5 ++ ...lu_full_sw_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_sw_high_school_physics.yaml | 5 ++ ...l_mmlu_full_sw_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_sw_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_sw_high_school_us_history.yaml | 5 ++ ...mlu_full_sw_high_school_world_history.yaml | 5 ++ .../sw/global_mmlu_full_sw_human_aging.yaml | 5 ++ .../global_mmlu_full_sw_human_sexuality.yaml | 5 ++ ...global_mmlu_full_sw_international_law.yaml | 5 ++ .../sw/global_mmlu_full_sw_jurisprudence.yaml | 5 ++ ...global_mmlu_full_sw_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_sw_machine_learning.yaml | 5 ++ .../sw/global_mmlu_full_sw_management.yaml | 5 ++ .../sw/global_mmlu_full_sw_marketing.yaml | 5 ++ .../global_mmlu_full_sw_medical_genetics.yaml | 5 ++ .../sw/global_mmlu_full_sw_miscellaneous.yaml | 5 ++ .../global_mmlu_full_sw_moral_disputes.yaml | 5 ++ .../global_mmlu_full_sw_moral_scenarios.yaml | 5 ++ .../sw/global_mmlu_full_sw_nutrition.yaml | 5 ++ .../sw/global_mmlu_full_sw_philosophy.yaml | 5 ++ .../sw/global_mmlu_full_sw_prehistory.yaml | 5 ++ ..._mmlu_full_sw_professional_accounting.yaml | 5 ++ .../global_mmlu_full_sw_professional_law.yaml | 5 ++ ...al_mmlu_full_sw_professional_medicine.yaml | 5 ++ ..._mmlu_full_sw_professional_psychology.yaml | 5 ++ .../global_mmlu_full_sw_public_relations.yaml | 5 ++ .../global_mmlu_full_sw_security_studies.yaml | 5 ++ .../sw/global_mmlu_full_sw_sociology.yaml | 5 ++ ...global_mmlu_full_sw_us_foreign_policy.yaml | 5 ++ .../full/sw/global_mmlu_full_sw_virology.yaml | 5 ++ .../global_mmlu_full_sw_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/sw/utils.py | 73 +++++++++++++++++++ .../full/te/_global_mmlu_full_te.yaml | 11 +++ .../te/_global_mmlu_full_te_humanities.yaml | 8 ++ .../full/te/_global_mmlu_full_te_other.yaml | 8 ++ .../_global_mmlu_full_te_social_sciences.yaml | 8 ++ .../full/te/_global_mmlu_full_te_stem.yaml | 8 ++ .../global_mmlu/full/te/_te_template_yaml | 16 ++++ .../global_mmlu_full_te_abstract_algebra.yaml | 5 ++ .../full/te/global_mmlu_full_te_anatomy.yaml | 5 ++ .../te/global_mmlu_full_te_astronomy.yaml | 5 ++ .../global_mmlu_full_te_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_te_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_te_college_biology.yaml | 5 ++ ...global_mmlu_full_te_college_chemistry.yaml | 5 ++ ...mmlu_full_te_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_te_college_mathematics.yaml | 5 ++ .../global_mmlu_full_te_college_medicine.yaml | 5 ++ .../global_mmlu_full_te_college_physics.yaml | 5 ++ ...global_mmlu_full_te_computer_security.yaml | 5 ++ ...lobal_mmlu_full_te_conceptual_physics.yaml | 5 ++ .../te/global_mmlu_full_te_econometrics.yaml | 5 ++ ...l_mmlu_full_te_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_te_elementary_mathematics.yaml | 5 ++ .../te/global_mmlu_full_te_formal_logic.yaml | 5 ++ .../te/global_mmlu_full_te_global_facts.yaml | 5 ++ ...obal_mmlu_full_te_high_school_biology.yaml | 5 ++ ...al_mmlu_full_te_high_school_chemistry.yaml | 5 ++ ..._full_te_high_school_computer_science.yaml | 5 ++ ..._full_te_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_te_high_school_geography.yaml | 5 ++ ...e_high_school_government_and_politics.yaml | 5 ++ ...lu_full_te_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_te_high_school_mathematics.yaml | 5 ++ ...lu_full_te_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_te_high_school_physics.yaml | 5 ++ ...l_mmlu_full_te_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_te_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_te_high_school_us_history.yaml | 5 ++ ...mlu_full_te_high_school_world_history.yaml | 5 ++ .../te/global_mmlu_full_te_human_aging.yaml | 5 ++ .../global_mmlu_full_te_human_sexuality.yaml | 5 ++ ...global_mmlu_full_te_international_law.yaml | 5 ++ .../te/global_mmlu_full_te_jurisprudence.yaml | 5 ++ ...global_mmlu_full_te_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_te_machine_learning.yaml | 5 ++ .../te/global_mmlu_full_te_management.yaml | 5 ++ .../te/global_mmlu_full_te_marketing.yaml | 5 ++ .../global_mmlu_full_te_medical_genetics.yaml | 5 ++ .../te/global_mmlu_full_te_miscellaneous.yaml | 5 ++ .../global_mmlu_full_te_moral_disputes.yaml | 5 ++ .../global_mmlu_full_te_moral_scenarios.yaml | 5 ++ .../te/global_mmlu_full_te_nutrition.yaml | 5 ++ .../te/global_mmlu_full_te_philosophy.yaml | 5 ++ .../te/global_mmlu_full_te_prehistory.yaml | 5 ++ ..._mmlu_full_te_professional_accounting.yaml | 5 ++ .../global_mmlu_full_te_professional_law.yaml | 5 ++ ...al_mmlu_full_te_professional_medicine.yaml | 5 ++ ..._mmlu_full_te_professional_psychology.yaml | 5 ++ .../global_mmlu_full_te_public_relations.yaml | 5 ++ .../global_mmlu_full_te_security_studies.yaml | 5 ++ .../te/global_mmlu_full_te_sociology.yaml | 5 ++ ...global_mmlu_full_te_us_foreign_policy.yaml | 5 ++ .../full/te/global_mmlu_full_te_virology.yaml | 5 ++ .../global_mmlu_full_te_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/te/utils.py | 73 +++++++++++++++++++ .../full/tr/_global_mmlu_full_tr.yaml | 11 +++ .../tr/_global_mmlu_full_tr_humanities.yaml | 8 ++ .../full/tr/_global_mmlu_full_tr_other.yaml | 8 ++ .../_global_mmlu_full_tr_social_sciences.yaml | 8 ++ .../full/tr/_global_mmlu_full_tr_stem.yaml | 8 ++ .../global_mmlu/full/tr/_tr_template_yaml | 16 ++++ .../global_mmlu_full_tr_abstract_algebra.yaml | 5 ++ .../full/tr/global_mmlu_full_tr_anatomy.yaml | 5 ++ .../tr/global_mmlu_full_tr_astronomy.yaml | 5 ++ .../global_mmlu_full_tr_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_tr_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_tr_college_biology.yaml | 5 ++ ...global_mmlu_full_tr_college_chemistry.yaml | 5 ++ ...mmlu_full_tr_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_tr_college_mathematics.yaml | 5 ++ .../global_mmlu_full_tr_college_medicine.yaml | 5 ++ .../global_mmlu_full_tr_college_physics.yaml | 5 ++ ...global_mmlu_full_tr_computer_security.yaml | 5 ++ ...lobal_mmlu_full_tr_conceptual_physics.yaml | 5 ++ .../tr/global_mmlu_full_tr_econometrics.yaml | 5 ++ ...l_mmlu_full_tr_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_tr_elementary_mathematics.yaml | 5 ++ .../tr/global_mmlu_full_tr_formal_logic.yaml | 5 ++ .../tr/global_mmlu_full_tr_global_facts.yaml | 5 ++ ...obal_mmlu_full_tr_high_school_biology.yaml | 5 ++ ...al_mmlu_full_tr_high_school_chemistry.yaml | 5 ++ ..._full_tr_high_school_computer_science.yaml | 5 ++ ..._full_tr_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_tr_high_school_geography.yaml | 5 ++ ...r_high_school_government_and_politics.yaml | 5 ++ ...lu_full_tr_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_tr_high_school_mathematics.yaml | 5 ++ ...lu_full_tr_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_tr_high_school_physics.yaml | 5 ++ ...l_mmlu_full_tr_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_tr_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_tr_high_school_us_history.yaml | 5 ++ ...mlu_full_tr_high_school_world_history.yaml | 5 ++ .../tr/global_mmlu_full_tr_human_aging.yaml | 5 ++ .../global_mmlu_full_tr_human_sexuality.yaml | 5 ++ ...global_mmlu_full_tr_international_law.yaml | 5 ++ .../tr/global_mmlu_full_tr_jurisprudence.yaml | 5 ++ ...global_mmlu_full_tr_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_tr_machine_learning.yaml | 5 ++ .../tr/global_mmlu_full_tr_management.yaml | 5 ++ .../tr/global_mmlu_full_tr_marketing.yaml | 5 ++ .../global_mmlu_full_tr_medical_genetics.yaml | 5 ++ .../tr/global_mmlu_full_tr_miscellaneous.yaml | 5 ++ .../global_mmlu_full_tr_moral_disputes.yaml | 5 ++ .../global_mmlu_full_tr_moral_scenarios.yaml | 5 ++ .../tr/global_mmlu_full_tr_nutrition.yaml | 5 ++ .../tr/global_mmlu_full_tr_philosophy.yaml | 5 ++ .../tr/global_mmlu_full_tr_prehistory.yaml | 5 ++ ..._mmlu_full_tr_professional_accounting.yaml | 5 ++ .../global_mmlu_full_tr_professional_law.yaml | 5 ++ ...al_mmlu_full_tr_professional_medicine.yaml | 5 ++ ..._mmlu_full_tr_professional_psychology.yaml | 5 ++ .../global_mmlu_full_tr_public_relations.yaml | 5 ++ .../global_mmlu_full_tr_security_studies.yaml | 5 ++ .../tr/global_mmlu_full_tr_sociology.yaml | 5 ++ ...global_mmlu_full_tr_us_foreign_policy.yaml | 5 ++ .../full/tr/global_mmlu_full_tr_virology.yaml | 5 ++ .../global_mmlu_full_tr_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/tr/utils.py | 73 +++++++++++++++++++ .../full/uk/_global_mmlu_full_uk.yaml | 11 +++ .../uk/_global_mmlu_full_uk_humanities.yaml | 8 ++ .../full/uk/_global_mmlu_full_uk_other.yaml | 8 ++ .../_global_mmlu_full_uk_social_sciences.yaml | 8 ++ .../full/uk/_global_mmlu_full_uk_stem.yaml | 8 ++ .../global_mmlu/full/uk/_uk_template_yaml | 16 ++++ .../global_mmlu_full_uk_abstract_algebra.yaml | 5 ++ .../full/uk/global_mmlu_full_uk_anatomy.yaml | 5 ++ .../uk/global_mmlu_full_uk_astronomy.yaml | 5 ++ .../global_mmlu_full_uk_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_uk_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_uk_college_biology.yaml | 5 ++ ...global_mmlu_full_uk_college_chemistry.yaml | 5 ++ ...mmlu_full_uk_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_uk_college_mathematics.yaml | 5 ++ .../global_mmlu_full_uk_college_medicine.yaml | 5 ++ .../global_mmlu_full_uk_college_physics.yaml | 5 ++ ...global_mmlu_full_uk_computer_security.yaml | 5 ++ ...lobal_mmlu_full_uk_conceptual_physics.yaml | 5 ++ .../uk/global_mmlu_full_uk_econometrics.yaml | 5 ++ ...l_mmlu_full_uk_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_uk_elementary_mathematics.yaml | 5 ++ .../uk/global_mmlu_full_uk_formal_logic.yaml | 5 ++ .../uk/global_mmlu_full_uk_global_facts.yaml | 5 ++ ...obal_mmlu_full_uk_high_school_biology.yaml | 5 ++ ...al_mmlu_full_uk_high_school_chemistry.yaml | 5 ++ ..._full_uk_high_school_computer_science.yaml | 5 ++ ..._full_uk_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_uk_high_school_geography.yaml | 5 ++ ...k_high_school_government_and_politics.yaml | 5 ++ ...lu_full_uk_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_uk_high_school_mathematics.yaml | 5 ++ ...lu_full_uk_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_uk_high_school_physics.yaml | 5 ++ ...l_mmlu_full_uk_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_uk_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_uk_high_school_us_history.yaml | 5 ++ ...mlu_full_uk_high_school_world_history.yaml | 5 ++ .../uk/global_mmlu_full_uk_human_aging.yaml | 5 ++ .../global_mmlu_full_uk_human_sexuality.yaml | 5 ++ ...global_mmlu_full_uk_international_law.yaml | 5 ++ .../uk/global_mmlu_full_uk_jurisprudence.yaml | 5 ++ ...global_mmlu_full_uk_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_uk_machine_learning.yaml | 5 ++ .../uk/global_mmlu_full_uk_management.yaml | 5 ++ .../uk/global_mmlu_full_uk_marketing.yaml | 5 ++ .../global_mmlu_full_uk_medical_genetics.yaml | 5 ++ .../uk/global_mmlu_full_uk_miscellaneous.yaml | 5 ++ .../global_mmlu_full_uk_moral_disputes.yaml | 5 ++ .../global_mmlu_full_uk_moral_scenarios.yaml | 5 ++ .../uk/global_mmlu_full_uk_nutrition.yaml | 5 ++ .../uk/global_mmlu_full_uk_philosophy.yaml | 5 ++ .../uk/global_mmlu_full_uk_prehistory.yaml | 5 ++ ..._mmlu_full_uk_professional_accounting.yaml | 5 ++ .../global_mmlu_full_uk_professional_law.yaml | 5 ++ ...al_mmlu_full_uk_professional_medicine.yaml | 5 ++ ..._mmlu_full_uk_professional_psychology.yaml | 5 ++ .../global_mmlu_full_uk_public_relations.yaml | 5 ++ .../global_mmlu_full_uk_security_studies.yaml | 5 ++ .../uk/global_mmlu_full_uk_sociology.yaml | 5 ++ ...global_mmlu_full_uk_us_foreign_policy.yaml | 5 ++ .../full/uk/global_mmlu_full_uk_virology.yaml | 5 ++ .../global_mmlu_full_uk_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/uk/utils.py | 73 +++++++++++++++++++ .../full/vi/_global_mmlu_full_vi.yaml | 11 +++ .../vi/_global_mmlu_full_vi_humanities.yaml | 8 ++ .../full/vi/_global_mmlu_full_vi_other.yaml | 8 ++ .../_global_mmlu_full_vi_social_sciences.yaml | 8 ++ .../full/vi/_global_mmlu_full_vi_stem.yaml | 8 ++ .../global_mmlu/full/vi/_vi_template_yaml | 16 ++++ .../global_mmlu_full_vi_abstract_algebra.yaml | 5 ++ .../full/vi/global_mmlu_full_vi_anatomy.yaml | 5 ++ .../vi/global_mmlu_full_vi_astronomy.yaml | 5 ++ .../global_mmlu_full_vi_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_vi_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_vi_college_biology.yaml | 5 ++ ...global_mmlu_full_vi_college_chemistry.yaml | 5 ++ ...mmlu_full_vi_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_vi_college_mathematics.yaml | 5 ++ .../global_mmlu_full_vi_college_medicine.yaml | 5 ++ .../global_mmlu_full_vi_college_physics.yaml | 5 ++ ...global_mmlu_full_vi_computer_security.yaml | 5 ++ ...lobal_mmlu_full_vi_conceptual_physics.yaml | 5 ++ .../vi/global_mmlu_full_vi_econometrics.yaml | 5 ++ ...l_mmlu_full_vi_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_vi_elementary_mathematics.yaml | 5 ++ .../vi/global_mmlu_full_vi_formal_logic.yaml | 5 ++ .../vi/global_mmlu_full_vi_global_facts.yaml | 5 ++ ...obal_mmlu_full_vi_high_school_biology.yaml | 5 ++ ...al_mmlu_full_vi_high_school_chemistry.yaml | 5 ++ ..._full_vi_high_school_computer_science.yaml | 5 ++ ..._full_vi_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_vi_high_school_geography.yaml | 5 ++ ...i_high_school_government_and_politics.yaml | 5 ++ ...lu_full_vi_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_vi_high_school_mathematics.yaml | 5 ++ ...lu_full_vi_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_vi_high_school_physics.yaml | 5 ++ ...l_mmlu_full_vi_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_vi_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_vi_high_school_us_history.yaml | 5 ++ ...mlu_full_vi_high_school_world_history.yaml | 5 ++ .../vi/global_mmlu_full_vi_human_aging.yaml | 5 ++ .../global_mmlu_full_vi_human_sexuality.yaml | 5 ++ ...global_mmlu_full_vi_international_law.yaml | 5 ++ .../vi/global_mmlu_full_vi_jurisprudence.yaml | 5 ++ ...global_mmlu_full_vi_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_vi_machine_learning.yaml | 5 ++ .../vi/global_mmlu_full_vi_management.yaml | 5 ++ .../vi/global_mmlu_full_vi_marketing.yaml | 5 ++ .../global_mmlu_full_vi_medical_genetics.yaml | 5 ++ .../vi/global_mmlu_full_vi_miscellaneous.yaml | 5 ++ .../global_mmlu_full_vi_moral_disputes.yaml | 5 ++ .../global_mmlu_full_vi_moral_scenarios.yaml | 5 ++ .../vi/global_mmlu_full_vi_nutrition.yaml | 5 ++ .../vi/global_mmlu_full_vi_philosophy.yaml | 5 ++ .../vi/global_mmlu_full_vi_prehistory.yaml | 5 ++ ..._mmlu_full_vi_professional_accounting.yaml | 5 ++ .../global_mmlu_full_vi_professional_law.yaml | 5 ++ ...al_mmlu_full_vi_professional_medicine.yaml | 5 ++ ..._mmlu_full_vi_professional_psychology.yaml | 5 ++ .../global_mmlu_full_vi_public_relations.yaml | 5 ++ .../global_mmlu_full_vi_security_studies.yaml | 5 ++ .../vi/global_mmlu_full_vi_sociology.yaml | 5 ++ ...global_mmlu_full_vi_us_foreign_policy.yaml | 5 ++ .../full/vi/global_mmlu_full_vi_virology.yaml | 5 ++ .../global_mmlu_full_vi_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/vi/utils.py | 73 +++++++++++++++++++ .../full/yo/_global_mmlu_full_yo.yaml | 11 +++ .../yo/_global_mmlu_full_yo_humanities.yaml | 8 ++ .../full/yo/_global_mmlu_full_yo_other.yaml | 8 ++ .../_global_mmlu_full_yo_social_sciences.yaml | 8 ++ .../full/yo/_global_mmlu_full_yo_stem.yaml | 8 ++ .../global_mmlu/full/yo/_yo_template_yaml | 16 ++++ .../global_mmlu_full_yo_abstract_algebra.yaml | 5 ++ .../full/yo/global_mmlu_full_yo_anatomy.yaml | 5 ++ .../yo/global_mmlu_full_yo_astronomy.yaml | 5 ++ .../global_mmlu_full_yo_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_yo_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_yo_college_biology.yaml | 5 ++ ...global_mmlu_full_yo_college_chemistry.yaml | 5 ++ ...mmlu_full_yo_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_yo_college_mathematics.yaml | 5 ++ .../global_mmlu_full_yo_college_medicine.yaml | 5 ++ .../global_mmlu_full_yo_college_physics.yaml | 5 ++ ...global_mmlu_full_yo_computer_security.yaml | 5 ++ ...lobal_mmlu_full_yo_conceptual_physics.yaml | 5 ++ .../yo/global_mmlu_full_yo_econometrics.yaml | 5 ++ ...l_mmlu_full_yo_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_yo_elementary_mathematics.yaml | 5 ++ .../yo/global_mmlu_full_yo_formal_logic.yaml | 5 ++ .../yo/global_mmlu_full_yo_global_facts.yaml | 5 ++ ...obal_mmlu_full_yo_high_school_biology.yaml | 5 ++ ...al_mmlu_full_yo_high_school_chemistry.yaml | 5 ++ ..._full_yo_high_school_computer_science.yaml | 5 ++ ..._full_yo_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_yo_high_school_geography.yaml | 5 ++ ...o_high_school_government_and_politics.yaml | 5 ++ ...lu_full_yo_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_yo_high_school_mathematics.yaml | 5 ++ ...lu_full_yo_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_yo_high_school_physics.yaml | 5 ++ ...l_mmlu_full_yo_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_yo_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_yo_high_school_us_history.yaml | 5 ++ ...mlu_full_yo_high_school_world_history.yaml | 5 ++ .../yo/global_mmlu_full_yo_human_aging.yaml | 5 ++ .../global_mmlu_full_yo_human_sexuality.yaml | 5 ++ ...global_mmlu_full_yo_international_law.yaml | 5 ++ .../yo/global_mmlu_full_yo_jurisprudence.yaml | 5 ++ ...global_mmlu_full_yo_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_yo_machine_learning.yaml | 5 ++ .../yo/global_mmlu_full_yo_management.yaml | 5 ++ .../yo/global_mmlu_full_yo_marketing.yaml | 5 ++ .../global_mmlu_full_yo_medical_genetics.yaml | 5 ++ .../yo/global_mmlu_full_yo_miscellaneous.yaml | 5 ++ .../global_mmlu_full_yo_moral_disputes.yaml | 5 ++ .../global_mmlu_full_yo_moral_scenarios.yaml | 5 ++ .../yo/global_mmlu_full_yo_nutrition.yaml | 5 ++ .../yo/global_mmlu_full_yo_philosophy.yaml | 5 ++ .../yo/global_mmlu_full_yo_prehistory.yaml | 5 ++ ..._mmlu_full_yo_professional_accounting.yaml | 5 ++ .../global_mmlu_full_yo_professional_law.yaml | 5 ++ ...al_mmlu_full_yo_professional_medicine.yaml | 5 ++ ..._mmlu_full_yo_professional_psychology.yaml | 5 ++ .../global_mmlu_full_yo_public_relations.yaml | 5 ++ .../global_mmlu_full_yo_security_studies.yaml | 5 ++ .../yo/global_mmlu_full_yo_sociology.yaml | 5 ++ ...global_mmlu_full_yo_us_foreign_policy.yaml | 5 ++ .../full/yo/global_mmlu_full_yo_virology.yaml | 5 ++ .../global_mmlu_full_yo_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/yo/utils.py | 73 +++++++++++++++++++ .../full/zh/_global_mmlu_full_zh.yaml | 11 +++ .../zh/_global_mmlu_full_zh_humanities.yaml | 8 ++ .../full/zh/_global_mmlu_full_zh_other.yaml | 8 ++ .../_global_mmlu_full_zh_social_sciences.yaml | 8 ++ .../full/zh/_global_mmlu_full_zh_stem.yaml | 8 ++ .../global_mmlu/full/zh/_zh_template_yaml | 16 ++++ .../global_mmlu_full_zh_abstract_algebra.yaml | 5 ++ .../full/zh/global_mmlu_full_zh_anatomy.yaml | 5 ++ .../zh/global_mmlu_full_zh_astronomy.yaml | 5 ++ .../global_mmlu_full_zh_business_ethics.yaml | 5 ++ ...lobal_mmlu_full_zh_clinical_knowledge.yaml | 5 ++ .../global_mmlu_full_zh_college_biology.yaml | 5 ++ ...global_mmlu_full_zh_college_chemistry.yaml | 5 ++ ...mmlu_full_zh_college_computer_science.yaml | 5 ++ ...obal_mmlu_full_zh_college_mathematics.yaml | 5 ++ .../global_mmlu_full_zh_college_medicine.yaml | 5 ++ .../global_mmlu_full_zh_college_physics.yaml | 5 ++ ...global_mmlu_full_zh_computer_security.yaml | 5 ++ ...lobal_mmlu_full_zh_conceptual_physics.yaml | 5 ++ .../zh/global_mmlu_full_zh_econometrics.yaml | 5 ++ ...l_mmlu_full_zh_electrical_engineering.yaml | 5 ++ ...l_mmlu_full_zh_elementary_mathematics.yaml | 5 ++ .../zh/global_mmlu_full_zh_formal_logic.yaml | 5 ++ .../zh/global_mmlu_full_zh_global_facts.yaml | 5 ++ ...obal_mmlu_full_zh_high_school_biology.yaml | 5 ++ ...al_mmlu_full_zh_high_school_chemistry.yaml | 5 ++ ..._full_zh_high_school_computer_science.yaml | 5 ++ ..._full_zh_high_school_european_history.yaml | 5 ++ ...al_mmlu_full_zh_high_school_geography.yaml | 5 ++ ...h_high_school_government_and_politics.yaml | 5 ++ ...lu_full_zh_high_school_macroeconomics.yaml | 5 ++ ..._mmlu_full_zh_high_school_mathematics.yaml | 5 ++ ...lu_full_zh_high_school_microeconomics.yaml | 5 ++ ...obal_mmlu_full_zh_high_school_physics.yaml | 5 ++ ...l_mmlu_full_zh_high_school_psychology.yaml | 5 ++ ...l_mmlu_full_zh_high_school_statistics.yaml | 5 ++ ...l_mmlu_full_zh_high_school_us_history.yaml | 5 ++ ...mlu_full_zh_high_school_world_history.yaml | 5 ++ .../zh/global_mmlu_full_zh_human_aging.yaml | 5 ++ .../global_mmlu_full_zh_human_sexuality.yaml | 5 ++ ...global_mmlu_full_zh_international_law.yaml | 5 ++ .../zh/global_mmlu_full_zh_jurisprudence.yaml | 5 ++ ...global_mmlu_full_zh_logical_fallacies.yaml | 5 ++ .../global_mmlu_full_zh_machine_learning.yaml | 5 ++ .../zh/global_mmlu_full_zh_management.yaml | 5 ++ .../zh/global_mmlu_full_zh_marketing.yaml | 5 ++ .../global_mmlu_full_zh_medical_genetics.yaml | 5 ++ .../zh/global_mmlu_full_zh_miscellaneous.yaml | 5 ++ .../global_mmlu_full_zh_moral_disputes.yaml | 5 ++ .../global_mmlu_full_zh_moral_scenarios.yaml | 5 ++ .../zh/global_mmlu_full_zh_nutrition.yaml | 5 ++ .../zh/global_mmlu_full_zh_philosophy.yaml | 5 ++ .../zh/global_mmlu_full_zh_prehistory.yaml | 5 ++ ..._mmlu_full_zh_professional_accounting.yaml | 5 ++ .../global_mmlu_full_zh_professional_law.yaml | 5 ++ ...al_mmlu_full_zh_professional_medicine.yaml | 5 ++ ..._mmlu_full_zh_professional_psychology.yaml | 5 ++ .../global_mmlu_full_zh_public_relations.yaml | 5 ++ .../global_mmlu_full_zh_security_studies.yaml | 5 ++ .../zh/global_mmlu_full_zh_sociology.yaml | 5 ++ ...global_mmlu_full_zh_us_foreign_policy.yaml | 5 ++ .../full/zh/global_mmlu_full_zh_virology.yaml | 5 ++ .../global_mmlu_full_zh_world_religions.yaml | 5 ++ lm_eval/tasks/global_mmlu/full/zh/utils.py | 73 +++++++++++++++++++ 2706 files changed, 17532 insertions(+), 1 deletion(-) rename lm_eval/tasks/global_mmlu/{ => default}/_default_yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/_generate_configs.py (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_ar.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_bn.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_de.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_en.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_es.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_fr.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_hi.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_id.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_it.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_ja.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_ko.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_pt.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_sw.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_yo.yaml (100%) rename lm_eval/tasks/global_mmlu/{ => default}/global_mmlu_zh.yaml (100%) create mode 100644 lm_eval/tasks/global_mmlu/full/am/_am_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/am/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ar/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/bn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/cs/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/de/_de_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/de/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/el/_el_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/el/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/en/_en_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/en/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/es/_es_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/es/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fa/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fil/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/fr/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ha/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/_he_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/he/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/hi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/_id_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/id/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ig/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/_it_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/it/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ja/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ko/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ky/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/lt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/mg/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ms/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ne/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/nl/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ny/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pl/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/pt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ro/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/ru/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/_si_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/si/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/_so_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/so/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sr/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sv/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/sw/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/_te_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/te/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/tr/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/uk/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/vi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/yo/utils.py create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml create mode 100644 lm_eval/tasks/global_mmlu/full/zh/utils.py diff --git a/lm_eval/tasks/global_mmlu/README.md b/lm_eval/tasks/global_mmlu/README.md index 838a7c9d..d1514102 100644 --- a/lm_eval/tasks/global_mmlu/README.md +++ b/lm_eval/tasks/global_mmlu/README.md @@ -6,9 +6,26 @@ Title: `Global MMLU: Understanding and Addressing Cultural and Linguistic Biases Abstract: [https://arxiv.org/abs/2412.03304](https://arxiv.org/abs/2412.03304) +Global-MMLU 🌍 is a multilingual evaluation set spanning 42 languages, including English. This dataset combines machine translations for MMLU questions along with professional translations and crowd-sourced post-edits. It also includes cultural sensitivity annotations for a subset of the questions (2850 questions per language) and classifies them as Culturally Sensitive (CS) 🗽 or Culturally Agnostic (CA) ⚖️. These annotations were collected as part of an open science initiative led by Cohere For AI in collaboration with many external collaborators from both industry and academia. + Global-MMLU-Lite is a balanced collection of culturally sensitive and culturally agnostic MMLU tasks. It is designed for efficient evaluation of multilingual models in 15 languages (including English). Only languages with human translations and post-edits in the original [Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) 🌍 dataset have been included in the lite version. -Homepage: [https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite](https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite) +Homepage: \ +[https://huggingface.co/datasets/CohereForAI/Global-MMLU](https://huggingface.co/datasets/CohereForAI/Global-MMLU) \ +[https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite](https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite) + + +#### Groups + +* `global_mmlu_{lang}`: This group uses `Global-MMLU-Lite` benchmark which supports 14 languages. +* `global_mmlu_full_{lang}`: This group uses `Global-MMLU` benchmark which supports 42 languages. + +#### Subgroups (support only for `full` version) + +* `global_mmlu_full_stem` +* `global_mmlu_full_humanities` +* `global_mmlu_full_social_sciences` +* `global_mmlu_full_other` ### Citation diff --git a/lm_eval/tasks/global_mmlu/_default_yaml b/lm_eval/tasks/global_mmlu/default/_default_yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/_default_yaml rename to lm_eval/tasks/global_mmlu/default/_default_yaml diff --git a/lm_eval/tasks/global_mmlu/_generate_configs.py b/lm_eval/tasks/global_mmlu/default/_generate_configs.py similarity index 100% rename from lm_eval/tasks/global_mmlu/_generate_configs.py rename to lm_eval/tasks/global_mmlu/default/_generate_configs.py diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_ar.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_bn.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_de.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_en.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_es.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_fr.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_hi.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_id.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_it.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_ja.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_ko.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_pt.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_sw.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_yo.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml diff --git a/lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml similarity index 100% rename from lm_eval/tasks/global_mmlu/global_mmlu_zh.yaml rename to lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml diff --git a/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml new file mode 100644 index 00000000..f52152bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_am_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: am +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml new file mode 100644 index 00000000..48fc270a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_am +task: + - global_mmlu_full_am_stem + - global_mmlu_full_am_other + - global_mmlu_full_am_social_sciences + - global_mmlu_full_am_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml new file mode 100644 index 00000000..e250d14c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_humanities +task: + - global_mmlu_full_am_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml new file mode 100644 index 00000000..4b5151ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_other +task: + - global_mmlu_full_am_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml new file mode 100644 index 00000000..f0fbcc1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_social_sciences +task: + - global_mmlu_full_am_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml new file mode 100644 index 00000000..b67dfdb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_am_stem +task: + - global_mmlu_full_am_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml new file mode 100644 index 00000000..06a70dd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml new file mode 100644 index 00000000..7914c3b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml new file mode 100644 index 00000000..4e7e2a04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml new file mode 100644 index 00000000..a98a9597 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml new file mode 100644 index 00000000..4c25627f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml new file mode 100644 index 00000000..a8b6661b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml new file mode 100644 index 00000000..b0d2d2a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml new file mode 100644 index 00000000..b5c52a82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml new file mode 100644 index 00000000..0b73422e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml new file mode 100644 index 00000000..bd36f40f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml new file mode 100644 index 00000000..009fdc1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml new file mode 100644 index 00000000..3df6247b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml new file mode 100644 index 00000000..4115ea02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml new file mode 100644 index 00000000..87dd12ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml new file mode 100644 index 00000000..d8f72619 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml new file mode 100644 index 00000000..455563f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml new file mode 100644 index 00000000..5c5babd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml new file mode 100644 index 00000000..b59d47e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml new file mode 100644 index 00000000..680d4eca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml new file mode 100644 index 00000000..96af7940 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml new file mode 100644 index 00000000..6cd19227 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml new file mode 100644 index 00000000..e0249142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml new file mode 100644 index 00000000..b4925a54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml new file mode 100644 index 00000000..d63f1d35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml new file mode 100644 index 00000000..3c8a0ea6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml new file mode 100644 index 00000000..76a8c3d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml new file mode 100644 index 00000000..1acbf4e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml new file mode 100644 index 00000000..dcfd9bb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml new file mode 100644 index 00000000..2dd64dc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml new file mode 100644 index 00000000..a523f443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml new file mode 100644 index 00000000..ce233f44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml new file mode 100644 index 00000000..20aeca5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml new file mode 100644 index 00000000..18e95e40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml new file mode 100644 index 00000000..140f2329 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml new file mode 100644 index 00000000..10a2d638 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_international_law diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml new file mode 100644 index 00000000..cd982742 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml new file mode 100644 index 00000000..2faf735c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml new file mode 100644 index 00000000..7f5c8e9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_am_stem_tasks +task: global_mmlu_full_am_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml new file mode 100644 index 00000000..08d080a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_management diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml new file mode 100644 index 00000000..52b4f7c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_marketing diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml new file mode 100644 index 00000000..32bd2432 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml new file mode 100644 index 00000000..ed5d610d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml new file mode 100644 index 00000000..bddaebc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml new file mode 100644 index 00000000..fda69f31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml new file mode 100644 index 00000000..bb0cb08b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml new file mode 100644 index 00000000..484c015e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml new file mode 100644 index 00000000..6e104f48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml new file mode 100644 index 00000000..50c9fe50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml new file mode 100644 index 00000000..df2cf26c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml new file mode 100644 index 00000000..c2860528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml new file mode 100644 index 00000000..8562a28d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml new file mode 100644 index 00000000..5cb3186c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml new file mode 100644 index 00000000..6aa8575e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml new file mode 100644 index 00000000..60005bab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_sociology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml new file mode 100644 index 00000000..374fb14a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_am_social_sciences_tasks +task: global_mmlu_full_am_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml new file mode 100644 index 00000000..9f235299 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_am_other_tasks +task: global_mmlu_full_am_virology diff --git a/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml new file mode 100644 index 00000000..c169a048 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/global_mmlu_full_am_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _am_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_am_humanities_tasks +task: global_mmlu_full_am_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/am/utils.py b/lm_eval/tasks/global_mmlu/full/am/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/am/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml b/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml new file mode 100644 index 00000000..768bb7f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_ar_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ar +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml new file mode 100644 index 00000000..61f60b9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ar +task: + - global_mmlu_full_ar_stem + - global_mmlu_full_ar_other + - global_mmlu_full_ar_social_sciences + - global_mmlu_full_ar_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml new file mode 100644 index 00000000..cfa6d80a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_humanities +task: + - global_mmlu_full_ar_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml new file mode 100644 index 00000000..26603f33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_other +task: + - global_mmlu_full_ar_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml new file mode 100644 index 00000000..aca95bc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_social_sciences +task: + - global_mmlu_full_ar_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml new file mode 100644 index 00000000..b91e6c9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ar_stem +task: + - global_mmlu_full_ar_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml new file mode 100644 index 00000000..1f044b04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml new file mode 100644 index 00000000..cd5d0963 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml new file mode 100644 index 00000000..d21c00b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml new file mode 100644 index 00000000..a73f5f2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml new file mode 100644 index 00000000..a9c3d078 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml new file mode 100644 index 00000000..6fba6a1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml new file mode 100644 index 00000000..386ba52d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml new file mode 100644 index 00000000..9b846715 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml new file mode 100644 index 00000000..c8d8d090 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml new file mode 100644 index 00000000..b988cfee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml new file mode 100644 index 00000000..008a39dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml new file mode 100644 index 00000000..34a93535 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml new file mode 100644 index 00000000..ea20efa5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml new file mode 100644 index 00000000..3a757901 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml new file mode 100644 index 00000000..31a4e22e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml new file mode 100644 index 00000000..25f4adb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml new file mode 100644 index 00000000..b2792d56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml new file mode 100644 index 00000000..af1bf60b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml new file mode 100644 index 00000000..8f7eaff7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml new file mode 100644 index 00000000..8f56395b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml new file mode 100644 index 00000000..6e388aed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml new file mode 100644 index 00000000..741584c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml new file mode 100644 index 00000000..3c376967 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml new file mode 100644 index 00000000..c71ada9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0b5f3267 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml new file mode 100644 index 00000000..cb259ac2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml new file mode 100644 index 00000000..c4ab308b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml new file mode 100644 index 00000000..68180e5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml new file mode 100644 index 00000000..e727ad09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml new file mode 100644 index 00000000..8ff9dd0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml new file mode 100644 index 00000000..668991cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml new file mode 100644 index 00000000..1df9a553 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml new file mode 100644 index 00000000..515a40f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml new file mode 100644 index 00000000..24caceac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml new file mode 100644 index 00000000..a5aee4b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml new file mode 100644 index 00000000..37781208 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml new file mode 100644 index 00000000..4365730e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml new file mode 100644 index 00000000..e1fc86e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ar_stem_tasks +task: global_mmlu_full_ar_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml new file mode 100644 index 00000000..4dc7c8c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_management diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml new file mode 100644 index 00000000..371fb521 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml new file mode 100644 index 00000000..c080b645 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml new file mode 100644 index 00000000..7d593ecb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml new file mode 100644 index 00000000..4021a93e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml new file mode 100644 index 00000000..f09edd00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml new file mode 100644 index 00000000..8d8577cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml new file mode 100644 index 00000000..733b77ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml new file mode 100644 index 00000000..4d1bf141 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml new file mode 100644 index 00000000..45b07299 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml new file mode 100644 index 00000000..6e33b583 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml new file mode 100644 index 00000000..4cd0a17a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml new file mode 100644 index 00000000..f035162d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml new file mode 100644 index 00000000..3d4dd34f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml new file mode 100644 index 00000000..f2245b52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml new file mode 100644 index 00000000..dd920305 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml new file mode 100644 index 00000000..0f38b855 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ar_social_sciences_tasks +task: global_mmlu_full_ar_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml new file mode 100644 index 00000000..f3be1f8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ar_other_tasks +task: global_mmlu_full_ar_virology diff --git a/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml new file mode 100644 index 00000000..7c7f01a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/global_mmlu_full_ar_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ar_humanities_tasks +task: global_mmlu_full_ar_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ar/utils.py b/lm_eval/tasks/global_mmlu/full/ar/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ar/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml b/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml new file mode 100644 index 00000000..f388063d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_bn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: bn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml new file mode 100644 index 00000000..f1c91f09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_bn +task: + - global_mmlu_full_bn_stem + - global_mmlu_full_bn_other + - global_mmlu_full_bn_social_sciences + - global_mmlu_full_bn_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml new file mode 100644 index 00000000..acd1ab01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_humanities +task: + - global_mmlu_full_bn_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml new file mode 100644 index 00000000..d2160298 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_other +task: + - global_mmlu_full_bn_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml new file mode 100644 index 00000000..c359b359 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_social_sciences +task: + - global_mmlu_full_bn_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml new file mode 100644 index 00000000..2c78c4ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_bn_stem +task: + - global_mmlu_full_bn_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml new file mode 100644 index 00000000..5bb7bb61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml new file mode 100644 index 00000000..d49070f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml new file mode 100644 index 00000000..2e6dbc97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml new file mode 100644 index 00000000..8c45a0e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml new file mode 100644 index 00000000..97e17570 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml new file mode 100644 index 00000000..9bf0b34c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml new file mode 100644 index 00000000..cb5a2600 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml new file mode 100644 index 00000000..ecd60e54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml new file mode 100644 index 00000000..5fb69d57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml new file mode 100644 index 00000000..442045f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml new file mode 100644 index 00000000..6849ffbb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml new file mode 100644 index 00000000..184097f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml new file mode 100644 index 00000000..4dc8a2c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml new file mode 100644 index 00000000..941f6355 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml new file mode 100644 index 00000000..5918b08a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml new file mode 100644 index 00000000..8f7d1f10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml new file mode 100644 index 00000000..b54c80db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml new file mode 100644 index 00000000..371d61cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml new file mode 100644 index 00000000..4f2c8731 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml new file mode 100644 index 00000000..0022c824 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml new file mode 100644 index 00000000..62ed6c6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml new file mode 100644 index 00000000..b9118a11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml new file mode 100644 index 00000000..a7fa3c1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml new file mode 100644 index 00000000..067ec0ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml new file mode 100644 index 00000000..12c775ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml new file mode 100644 index 00000000..82809b15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml new file mode 100644 index 00000000..a14eb703 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml new file mode 100644 index 00000000..a84f85fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml new file mode 100644 index 00000000..5b10c59d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml new file mode 100644 index 00000000..f4231ea2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml new file mode 100644 index 00000000..28dbddf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml new file mode 100644 index 00000000..fc8aec7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml new file mode 100644 index 00000000..16a3c204 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml new file mode 100644 index 00000000..a25244a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml new file mode 100644 index 00000000..b47f516d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_international_law diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml new file mode 100644 index 00000000..08ab51e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml new file mode 100644 index 00000000..0885a1a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml new file mode 100644 index 00000000..f0eb0997 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_bn_stem_tasks +task: global_mmlu_full_bn_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml new file mode 100644 index 00000000..d006b411 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_management diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml new file mode 100644 index 00000000..520f9469 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_marketing diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml new file mode 100644 index 00000000..88caa977 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml new file mode 100644 index 00000000..9ce31f7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml new file mode 100644 index 00000000..44403216 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml new file mode 100644 index 00000000..e5f59e15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml new file mode 100644 index 00000000..422bba55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml new file mode 100644 index 00000000..62af532b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml new file mode 100644 index 00000000..dc49d36c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml new file mode 100644 index 00000000..bf72a6a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml new file mode 100644 index 00000000..f49fb142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml new file mode 100644 index 00000000..3c53d77a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml new file mode 100644 index 00000000..a50c5cbf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml new file mode 100644 index 00000000..00e2742a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml new file mode 100644 index 00000000..5a0e7612 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml new file mode 100644 index 00000000..e8820319 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_sociology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml new file mode 100644 index 00000000..42be796a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_bn_social_sciences_tasks +task: global_mmlu_full_bn_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml new file mode 100644 index 00000000..3959f006 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_bn_other_tasks +task: global_mmlu_full_bn_virology diff --git a/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml new file mode 100644 index 00000000..15ee9efc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/global_mmlu_full_bn_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_bn_humanities_tasks +task: global_mmlu_full_bn_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/bn/utils.py b/lm_eval/tasks/global_mmlu/full/bn/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/bn/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml b/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml new file mode 100644 index 00000000..ce2189a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_cs_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: cs +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml new file mode 100644 index 00000000..977b0051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_cs +task: + - global_mmlu_full_cs_stem + - global_mmlu_full_cs_other + - global_mmlu_full_cs_social_sciences + - global_mmlu_full_cs_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml new file mode 100644 index 00000000..b4b4aff3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_humanities +task: + - global_mmlu_full_cs_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml new file mode 100644 index 00000000..302912e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_other +task: + - global_mmlu_full_cs_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml new file mode 100644 index 00000000..d3fed76e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_social_sciences +task: + - global_mmlu_full_cs_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml new file mode 100644 index 00000000..898bb092 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_cs_stem +task: + - global_mmlu_full_cs_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml new file mode 100644 index 00000000..40431ec9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml new file mode 100644 index 00000000..97d7354d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml new file mode 100644 index 00000000..5b5a5f99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml new file mode 100644 index 00000000..6db79c52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml new file mode 100644 index 00000000..3a17c605 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml new file mode 100644 index 00000000..9c6597b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml new file mode 100644 index 00000000..713af5c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml new file mode 100644 index 00000000..fd619d13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml new file mode 100644 index 00000000..e09563f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml new file mode 100644 index 00000000..f7b868c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml new file mode 100644 index 00000000..e98df339 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml new file mode 100644 index 00000000..7256ad67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml new file mode 100644 index 00000000..9bd64498 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml new file mode 100644 index 00000000..c954d320 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml new file mode 100644 index 00000000..2f80e8ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml new file mode 100644 index 00000000..bfbc2c9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml new file mode 100644 index 00000000..0c2ec8bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml new file mode 100644 index 00000000..6302b417 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml new file mode 100644 index 00000000..b69e9ac3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml new file mode 100644 index 00000000..67f53cf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml new file mode 100644 index 00000000..0be19221 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml new file mode 100644 index 00000000..7fa264c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml new file mode 100644 index 00000000..b9f903c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml new file mode 100644 index 00000000..5bde4d69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml new file mode 100644 index 00000000..bb5068ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml new file mode 100644 index 00000000..87cb3e57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml new file mode 100644 index 00000000..33c2e18c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml new file mode 100644 index 00000000..1ed095bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml new file mode 100644 index 00000000..59b62305 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml new file mode 100644 index 00000000..1a18ee25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml new file mode 100644 index 00000000..d8d0a271 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml new file mode 100644 index 00000000..07012306 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml new file mode 100644 index 00000000..e3f5c7c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml new file mode 100644 index 00000000..61d405c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml new file mode 100644 index 00000000..509ebee4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_international_law diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml new file mode 100644 index 00000000..c0e27957 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml new file mode 100644 index 00000000..85010f3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml new file mode 100644 index 00000000..32aaa1a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_cs_stem_tasks +task: global_mmlu_full_cs_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml new file mode 100644 index 00000000..4e1a3a7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_management diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml new file mode 100644 index 00000000..239e3c0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_marketing diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml new file mode 100644 index 00000000..1c76fee7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml new file mode 100644 index 00000000..4be6207a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml new file mode 100644 index 00000000..b263f67e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml new file mode 100644 index 00000000..6532a43e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml new file mode 100644 index 00000000..3f04fbcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml new file mode 100644 index 00000000..2f5093f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml new file mode 100644 index 00000000..a8f5f5a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml new file mode 100644 index 00000000..bccb71b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml new file mode 100644 index 00000000..ff50f50c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml new file mode 100644 index 00000000..9b829379 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml new file mode 100644 index 00000000..e41edb29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml new file mode 100644 index 00000000..e8fb512d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml new file mode 100644 index 00000000..64ec0b3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml new file mode 100644 index 00000000..18214f7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_sociology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml new file mode 100644 index 00000000..ac42b097 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_cs_social_sciences_tasks +task: global_mmlu_full_cs_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml new file mode 100644 index 00000000..a51b8aef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_cs_other_tasks +task: global_mmlu_full_cs_virology diff --git a/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml new file mode 100644 index 00000000..cf9af3e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/global_mmlu_full_cs_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _cs_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_cs_humanities_tasks +task: global_mmlu_full_cs_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/cs/utils.py b/lm_eval/tasks/global_mmlu/full/cs/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/cs/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml b/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml new file mode 100644 index 00000000..036b8619 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_de_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: de +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml new file mode 100644 index 00000000..c09da268 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_de +task: + - global_mmlu_full_de_stem + - global_mmlu_full_de_other + - global_mmlu_full_de_social_sciences + - global_mmlu_full_de_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml new file mode 100644 index 00000000..df571c67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_humanities +task: + - global_mmlu_full_de_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml new file mode 100644 index 00000000..bfff864e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_other +task: + - global_mmlu_full_de_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml new file mode 100644 index 00000000..8cf304a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_social_sciences +task: + - global_mmlu_full_de_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml new file mode 100644 index 00000000..75d1aa5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_de_stem +task: + - global_mmlu_full_de_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml new file mode 100644 index 00000000..07cd2356 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml new file mode 100644 index 00000000..9deb16a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml new file mode 100644 index 00000000..6a743d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml new file mode 100644 index 00000000..37bf9d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml new file mode 100644 index 00000000..c5ad878a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml new file mode 100644 index 00000000..200f9239 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml new file mode 100644 index 00000000..2bbc4d46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml new file mode 100644 index 00000000..ac903e3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml new file mode 100644 index 00000000..616010ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml new file mode 100644 index 00000000..b9648ce8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml new file mode 100644 index 00000000..d3bc6892 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml new file mode 100644 index 00000000..fee01f9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml new file mode 100644 index 00000000..201c17d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml new file mode 100644 index 00000000..1d902c3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml new file mode 100644 index 00000000..8dcb6c48 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml new file mode 100644 index 00000000..a1ca41ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml new file mode 100644 index 00000000..6e16729e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml new file mode 100644 index 00000000..a7b09289 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml new file mode 100644 index 00000000..0ad59551 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml new file mode 100644 index 00000000..6c0fbd55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml new file mode 100644 index 00000000..0aea5ada --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml new file mode 100644 index 00000000..97293b49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml new file mode 100644 index 00000000..d26a65d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml new file mode 100644 index 00000000..b6ec78e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml new file mode 100644 index 00000000..53489d85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml new file mode 100644 index 00000000..44a5666f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml new file mode 100644 index 00000000..3b911297 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml new file mode 100644 index 00000000..8d17d047 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml new file mode 100644 index 00000000..ae768002 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml new file mode 100644 index 00000000..4c272287 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml new file mode 100644 index 00000000..9c1eff81 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml new file mode 100644 index 00000000..11f804a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml new file mode 100644 index 00000000..7d5b4d77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml new file mode 100644 index 00000000..b3f09c7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml new file mode 100644 index 00000000..34bb5918 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_international_law diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml new file mode 100644 index 00000000..585e99b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml new file mode 100644 index 00000000..dd09d6ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml new file mode 100644 index 00000000..dfe82a9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_de_stem_tasks +task: global_mmlu_full_de_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml new file mode 100644 index 00000000..7304da38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_management diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml new file mode 100644 index 00000000..2143e4f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_marketing diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml new file mode 100644 index 00000000..01549868 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml new file mode 100644 index 00000000..0c8bd533 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml new file mode 100644 index 00000000..f03361ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml new file mode 100644 index 00000000..a36519a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml new file mode 100644 index 00000000..799065cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml new file mode 100644 index 00000000..a5f0372b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml new file mode 100644 index 00000000..2145e87d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml new file mode 100644 index 00000000..7ad55e97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml new file mode 100644 index 00000000..6f4e338f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml new file mode 100644 index 00000000..7a1214a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml new file mode 100644 index 00000000..a2d49ec8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml new file mode 100644 index 00000000..4b7d23a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml new file mode 100644 index 00000000..a1a3b22e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml new file mode 100644 index 00000000..fefef9d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_sociology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml new file mode 100644 index 00000000..35394ab5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_de_social_sciences_tasks +task: global_mmlu_full_de_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml new file mode 100644 index 00000000..f0f2f595 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_de_other_tasks +task: global_mmlu_full_de_virology diff --git a/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml new file mode 100644 index 00000000..1a43e6fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/global_mmlu_full_de_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_de_humanities_tasks +task: global_mmlu_full_de_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/de/utils.py b/lm_eval/tasks/global_mmlu/full/de/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/de/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml b/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml new file mode 100644 index 00000000..5fccad5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_el_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: el +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml new file mode 100644 index 00000000..a77feecb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_el +task: + - global_mmlu_full_el_stem + - global_mmlu_full_el_other + - global_mmlu_full_el_social_sciences + - global_mmlu_full_el_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml new file mode 100644 index 00000000..f07f2b52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_humanities +task: + - global_mmlu_full_el_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml new file mode 100644 index 00000000..938292f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_other +task: + - global_mmlu_full_el_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml new file mode 100644 index 00000000..e72e1e9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_social_sciences +task: + - global_mmlu_full_el_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml new file mode 100644 index 00000000..2123be08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_el_stem +task: + - global_mmlu_full_el_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml new file mode 100644 index 00000000..bc56c069 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml new file mode 100644 index 00000000..0b2e0e7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml new file mode 100644 index 00000000..7faf7389 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml new file mode 100644 index 00000000..0e8b5bb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml new file mode 100644 index 00000000..51ade421 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml new file mode 100644 index 00000000..cf3aa362 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml new file mode 100644 index 00000000..cd8e1dac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml new file mode 100644 index 00000000..f1ea0859 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml new file mode 100644 index 00000000..0ec055b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml new file mode 100644 index 00000000..b16b545b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml new file mode 100644 index 00000000..a4630f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml new file mode 100644 index 00000000..a40228ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml new file mode 100644 index 00000000..e7baf6e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml new file mode 100644 index 00000000..48e59021 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml new file mode 100644 index 00000000..294c3c5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml new file mode 100644 index 00000000..4373d82e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml new file mode 100644 index 00000000..81799a17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml new file mode 100644 index 00000000..6317eeec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml new file mode 100644 index 00000000..fa5958aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml new file mode 100644 index 00000000..38053add --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml new file mode 100644 index 00000000..4fe73214 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml new file mode 100644 index 00000000..8beb1e3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml new file mode 100644 index 00000000..22c08321 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e3b33041 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml new file mode 100644 index 00000000..63ba6a05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml new file mode 100644 index 00000000..f6ff6e2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml new file mode 100644 index 00000000..5e4deeeb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml new file mode 100644 index 00000000..cb875703 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml new file mode 100644 index 00000000..e82d1b53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml new file mode 100644 index 00000000..0003184c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml new file mode 100644 index 00000000..f5e0a367 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml new file mode 100644 index 00000000..ac460ea8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml new file mode 100644 index 00000000..8a40e04f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml new file mode 100644 index 00000000..de5075bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml new file mode 100644 index 00000000..2fb93f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_international_law diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml new file mode 100644 index 00000000..624e040a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml new file mode 100644 index 00000000..7e5bdb4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml new file mode 100644 index 00000000..180f3b25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_el_stem_tasks +task: global_mmlu_full_el_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml new file mode 100644 index 00000000..40487fb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_management diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml new file mode 100644 index 00000000..781d4170 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_marketing diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml new file mode 100644 index 00000000..2ca01146 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml new file mode 100644 index 00000000..66114367 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml new file mode 100644 index 00000000..c553ab7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml new file mode 100644 index 00000000..14a79a4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml new file mode 100644 index 00000000..595daa39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml new file mode 100644 index 00000000..25b121b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml new file mode 100644 index 00000000..5938a174 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml new file mode 100644 index 00000000..002b02aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml new file mode 100644 index 00000000..7b457038 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml new file mode 100644 index 00000000..a31d4e3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml new file mode 100644 index 00000000..6e048079 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml new file mode 100644 index 00000000..264799d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml new file mode 100644 index 00000000..19ffae47 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml new file mode 100644 index 00000000..f57d3e0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_sociology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml new file mode 100644 index 00000000..14c76440 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_el_social_sciences_tasks +task: global_mmlu_full_el_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml new file mode 100644 index 00000000..0e444358 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_el_other_tasks +task: global_mmlu_full_el_virology diff --git a/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml new file mode 100644 index 00000000..60f8e52e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/global_mmlu_full_el_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _el_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_el_humanities_tasks +task: global_mmlu_full_el_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/el/utils.py b/lm_eval/tasks/global_mmlu/full/el/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/el/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml b/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml new file mode 100644 index 00000000..ae7da46b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_en_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: en +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml new file mode 100644 index 00000000..648a10dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_en +task: + - global_mmlu_full_en_stem + - global_mmlu_full_en_other + - global_mmlu_full_en_social_sciences + - global_mmlu_full_en_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml new file mode 100644 index 00000000..4455fbcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_humanities +task: + - global_mmlu_full_en_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml new file mode 100644 index 00000000..cca60e52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_other +task: + - global_mmlu_full_en_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml new file mode 100644 index 00000000..becac7a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_social_sciences +task: + - global_mmlu_full_en_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml new file mode 100644 index 00000000..71aac061 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_en_stem +task: + - global_mmlu_full_en_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml new file mode 100644 index 00000000..3d7a5ed8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml new file mode 100644 index 00000000..f2267ad8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml new file mode 100644 index 00000000..6999c30f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml new file mode 100644 index 00000000..56a6e490 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml new file mode 100644 index 00000000..60425fad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml new file mode 100644 index 00000000..9b5f2f8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml new file mode 100644 index 00000000..8e2ab91f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml new file mode 100644 index 00000000..9abf38db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml new file mode 100644 index 00000000..5da6199f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml new file mode 100644 index 00000000..c568f36b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml new file mode 100644 index 00000000..ac044019 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml new file mode 100644 index 00000000..be47dbde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml new file mode 100644 index 00000000..86180924 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml new file mode 100644 index 00000000..a75d329f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml new file mode 100644 index 00000000..2568993f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml new file mode 100644 index 00000000..622a99f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml new file mode 100644 index 00000000..109ca44a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml new file mode 100644 index 00000000..39daa506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml new file mode 100644 index 00000000..063392eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml new file mode 100644 index 00000000..452e9445 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml new file mode 100644 index 00000000..baf43136 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml new file mode 100644 index 00000000..fceda5c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml new file mode 100644 index 00000000..4fbb9ade --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml new file mode 100644 index 00000000..73ca9087 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml new file mode 100644 index 00000000..1b9ca7a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml new file mode 100644 index 00000000..9be50ad2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml new file mode 100644 index 00000000..d93285cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml new file mode 100644 index 00000000..2f74c609 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml new file mode 100644 index 00000000..365762ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml new file mode 100644 index 00000000..d6ca42ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml new file mode 100644 index 00000000..4f20a4dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml new file mode 100644 index 00000000..d0fce403 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml new file mode 100644 index 00000000..35320a85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml new file mode 100644 index 00000000..86096c5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml new file mode 100644 index 00000000..8a41e9fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_international_law diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml new file mode 100644 index 00000000..aa34c443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml new file mode 100644 index 00000000..50c105b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml new file mode 100644 index 00000000..35f496c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_en_stem_tasks +task: global_mmlu_full_en_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml new file mode 100644 index 00000000..d8499d9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_management diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml new file mode 100644 index 00000000..05f8f0ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_marketing diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml new file mode 100644 index 00000000..8f272510 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml new file mode 100644 index 00000000..a72fad22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml new file mode 100644 index 00000000..2504abeb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml new file mode 100644 index 00000000..4ae4c37a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml new file mode 100644 index 00000000..b5364f69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml new file mode 100644 index 00000000..6e68d7e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml new file mode 100644 index 00000000..72e93368 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml new file mode 100644 index 00000000..cdb66ead --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml new file mode 100644 index 00000000..67120278 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml new file mode 100644 index 00000000..ffbcb29b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml new file mode 100644 index 00000000..1abea59b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml new file mode 100644 index 00000000..9df4f491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml new file mode 100644 index 00000000..addb6934 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml new file mode 100644 index 00000000..a198cb84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_sociology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml new file mode 100644 index 00000000..047b61e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_en_social_sciences_tasks +task: global_mmlu_full_en_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml new file mode 100644 index 00000000..bb74fefd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_en_other_tasks +task: global_mmlu_full_en_virology diff --git a/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml new file mode 100644 index 00000000..2c453bf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/global_mmlu_full_en_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_en_humanities_tasks +task: global_mmlu_full_en_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/en/utils.py b/lm_eval/tasks/global_mmlu/full/en/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/en/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml new file mode 100644 index 00000000..443af17c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_es_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: es +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml new file mode 100644 index 00000000..832001c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_es +task: + - global_mmlu_full_es_stem + - global_mmlu_full_es_other + - global_mmlu_full_es_social_sciences + - global_mmlu_full_es_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml new file mode 100644 index 00000000..bda6944e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_humanities +task: + - global_mmlu_full_es_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml new file mode 100644 index 00000000..610366ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_other +task: + - global_mmlu_full_es_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml new file mode 100644 index 00000000..00948690 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_social_sciences +task: + - global_mmlu_full_es_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml new file mode 100644 index 00000000..483a8fd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_es_stem +task: + - global_mmlu_full_es_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml new file mode 100644 index 00000000..02fb7200 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml new file mode 100644 index 00000000..40f05e7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml new file mode 100644 index 00000000..fb688c13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml new file mode 100644 index 00000000..aab858f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml new file mode 100644 index 00000000..a3483f8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml new file mode 100644 index 00000000..36658ab6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml new file mode 100644 index 00000000..47a47444 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml new file mode 100644 index 00000000..4154324e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml new file mode 100644 index 00000000..85bc6261 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml new file mode 100644 index 00000000..40e8d129 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml new file mode 100644 index 00000000..7ebc5e95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml new file mode 100644 index 00000000..b586eb2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml new file mode 100644 index 00000000..4186cec6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml new file mode 100644 index 00000000..3d61c8f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml new file mode 100644 index 00000000..1a454d79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml new file mode 100644 index 00000000..772436e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml new file mode 100644 index 00000000..da6223fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml new file mode 100644 index 00000000..ae3b5912 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml new file mode 100644 index 00000000..79a72140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml new file mode 100644 index 00000000..27ba7570 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml new file mode 100644 index 00000000..72ad4505 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml new file mode 100644 index 00000000..2cec9d5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml new file mode 100644 index 00000000..5ee91f71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml new file mode 100644 index 00000000..b3f10319 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml new file mode 100644 index 00000000..d555129a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml new file mode 100644 index 00000000..a1216336 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml new file mode 100644 index 00000000..d4c28844 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml new file mode 100644 index 00000000..fb83ad1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml new file mode 100644 index 00000000..4bcd53e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml new file mode 100644 index 00000000..900936eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml new file mode 100644 index 00000000..d54acd65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml new file mode 100644 index 00000000..2a654fe8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml new file mode 100644 index 00000000..47bd8900 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml new file mode 100644 index 00000000..29925c34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml new file mode 100644 index 00000000..abe4ef94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_international_law diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml new file mode 100644 index 00000000..751878fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml new file mode 100644 index 00000000..55233f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml new file mode 100644 index 00000000..9a11e310 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_es_stem_tasks +task: global_mmlu_full_es_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml new file mode 100644 index 00000000..a31b4c26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_management diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml new file mode 100644 index 00000000..22136569 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_marketing diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml new file mode 100644 index 00000000..18fc7a23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml new file mode 100644 index 00000000..5b3955a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml new file mode 100644 index 00000000..57095856 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml new file mode 100644 index 00000000..ed31f8cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml new file mode 100644 index 00000000..07746d09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml new file mode 100644 index 00000000..3853e162 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml new file mode 100644 index 00000000..b75ac9df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml new file mode 100644 index 00000000..da8fd46f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml new file mode 100644 index 00000000..ddd0ab3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml new file mode 100644 index 00000000..6be1ae81 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml new file mode 100644 index 00000000..cadc7f96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml new file mode 100644 index 00000000..72609ea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml new file mode 100644 index 00000000..319123c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml new file mode 100644 index 00000000..dec44c29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_sociology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml new file mode 100644 index 00000000..a18a3942 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_es_social_sciences_tasks +task: global_mmlu_full_es_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml new file mode 100644 index 00000000..b06431e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_es_other_tasks +task: global_mmlu_full_es_virology diff --git a/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml new file mode 100644 index 00000000..4d9d6b79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/global_mmlu_full_es_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_es_humanities_tasks +task: global_mmlu_full_es_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/es/utils.py b/lm_eval/tasks/global_mmlu/full/es/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/es/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml b/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml new file mode 100644 index 00000000..952259b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_fa_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fa +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml new file mode 100644 index 00000000..9edb8540 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fa +task: + - global_mmlu_full_fa_stem + - global_mmlu_full_fa_other + - global_mmlu_full_fa_social_sciences + - global_mmlu_full_fa_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml new file mode 100644 index 00000000..f36ecea5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_humanities +task: + - global_mmlu_full_fa_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml new file mode 100644 index 00000000..dd57bb86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_other +task: + - global_mmlu_full_fa_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml new file mode 100644 index 00000000..9e7da860 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_social_sciences +task: + - global_mmlu_full_fa_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml new file mode 100644 index 00000000..5bf2eb01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fa_stem +task: + - global_mmlu_full_fa_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml new file mode 100644 index 00000000..1014795f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml new file mode 100644 index 00000000..317705c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml new file mode 100644 index 00000000..45475964 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml new file mode 100644 index 00000000..3c0dd60b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml new file mode 100644 index 00000000..a7af0e21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml new file mode 100644 index 00000000..31ae6d71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml new file mode 100644 index 00000000..8b099f41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml new file mode 100644 index 00000000..07491e5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml new file mode 100644 index 00000000..774f6b97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml new file mode 100644 index 00000000..13d6f5a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml new file mode 100644 index 00000000..1e415b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml new file mode 100644 index 00000000..ae47213b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml new file mode 100644 index 00000000..c3f2ba4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml new file mode 100644 index 00000000..6cf79a92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml new file mode 100644 index 00000000..ab7aa858 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml new file mode 100644 index 00000000..b83f6ddc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml new file mode 100644 index 00000000..cab2effa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml new file mode 100644 index 00000000..93d11b75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml new file mode 100644 index 00000000..59b6869b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml new file mode 100644 index 00000000..8d15d4b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml new file mode 100644 index 00000000..a02df4f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml new file mode 100644 index 00000000..e18b2c7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml new file mode 100644 index 00000000..d94c7e89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e1007895 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml new file mode 100644 index 00000000..a9ad0633 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml new file mode 100644 index 00000000..2c733b17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml new file mode 100644 index 00000000..4f88f0aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml new file mode 100644 index 00000000..64fdef98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml new file mode 100644 index 00000000..c43a115b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml new file mode 100644 index 00000000..ebb4e82d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml new file mode 100644 index 00000000..a0041e33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml new file mode 100644 index 00000000..66f38f54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml new file mode 100644 index 00000000..95a2adde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml new file mode 100644 index 00000000..475a71fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml new file mode 100644 index 00000000..c0d6aec2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml new file mode 100644 index 00000000..0d82bd5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml new file mode 100644 index 00000000..a8e89d3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml new file mode 100644 index 00000000..4e4d1a8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fa_stem_tasks +task: global_mmlu_full_fa_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml new file mode 100644 index 00000000..e7e592ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_management diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml new file mode 100644 index 00000000..c0e7ef1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml new file mode 100644 index 00000000..c31679ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml new file mode 100644 index 00000000..652d5a33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml new file mode 100644 index 00000000..16adcb26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml new file mode 100644 index 00000000..92d018f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml new file mode 100644 index 00000000..ae7e065e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml new file mode 100644 index 00000000..cd8513da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml new file mode 100644 index 00000000..9fd6bb3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml new file mode 100644 index 00000000..99f6c316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml new file mode 100644 index 00000000..9fee460a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml new file mode 100644 index 00000000..13d67d45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml new file mode 100644 index 00000000..3e821145 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml new file mode 100644 index 00000000..de6cc311 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml new file mode 100644 index 00000000..64d5fd14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml new file mode 100644 index 00000000..cf3d9564 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml new file mode 100644 index 00000000..38d51936 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fa_social_sciences_tasks +task: global_mmlu_full_fa_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml new file mode 100644 index 00000000..39c5188d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fa_other_tasks +task: global_mmlu_full_fa_virology diff --git a/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml new file mode 100644 index 00000000..44e6fc82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/global_mmlu_full_fa_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fa_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fa_humanities_tasks +task: global_mmlu_full_fa_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fa/utils.py b/lm_eval/tasks/global_mmlu/full/fa/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fa/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml b/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml new file mode 100644 index 00000000..32dc097a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_fil_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fil +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml new file mode 100644 index 00000000..24fcb6d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fil +task: + - global_mmlu_full_fil_stem + - global_mmlu_full_fil_other + - global_mmlu_full_fil_social_sciences + - global_mmlu_full_fil_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml new file mode 100644 index 00000000..061eb818 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_humanities +task: + - global_mmlu_full_fil_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml new file mode 100644 index 00000000..fea793ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_other +task: + - global_mmlu_full_fil_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml new file mode 100644 index 00000000..e9f79330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_social_sciences +task: + - global_mmlu_full_fil_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml new file mode 100644 index 00000000..2e567c70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fil_stem +task: + - global_mmlu_full_fil_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml new file mode 100644 index 00000000..7eef19d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml new file mode 100644 index 00000000..e87d8d80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml new file mode 100644 index 00000000..6c258877 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml new file mode 100644 index 00000000..139f3ccc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml new file mode 100644 index 00000000..fc160a99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml new file mode 100644 index 00000000..ff6fa3d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml new file mode 100644 index 00000000..61f0df50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml new file mode 100644 index 00000000..1385b934 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml new file mode 100644 index 00000000..afe15d7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml new file mode 100644 index 00000000..221289f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml new file mode 100644 index 00000000..863792b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml new file mode 100644 index 00000000..7971c606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml new file mode 100644 index 00000000..77a75ccf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml new file mode 100644 index 00000000..bd98fc8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml new file mode 100644 index 00000000..98e48a27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml new file mode 100644 index 00000000..eba4149c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml new file mode 100644 index 00000000..f1796059 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml new file mode 100644 index 00000000..96886181 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml new file mode 100644 index 00000000..93d94120 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml new file mode 100644 index 00000000..9ec56d5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml new file mode 100644 index 00000000..82d86aed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml new file mode 100644 index 00000000..7fcdec0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml new file mode 100644 index 00000000..96268192 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml new file mode 100644 index 00000000..f826de3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml new file mode 100644 index 00000000..104a7088 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml new file mode 100644 index 00000000..1d499b4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml new file mode 100644 index 00000000..43fcc04d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml new file mode 100644 index 00000000..175f31ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml new file mode 100644 index 00000000..2fc2dd5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml new file mode 100644 index 00000000..0540d57c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml new file mode 100644 index 00000000..d0801af2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml new file mode 100644 index 00000000..724b7ce8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml new file mode 100644 index 00000000..6c2c1141 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml new file mode 100644 index 00000000..1672d5b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml new file mode 100644 index 00000000..4c5da91c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml new file mode 100644 index 00000000..dea2b20b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml new file mode 100644 index 00000000..6a30c724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml new file mode 100644 index 00000000..d2a7062c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fil_stem_tasks +task: global_mmlu_full_fil_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml new file mode 100644 index 00000000..1ea56835 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_management diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml new file mode 100644 index 00000000..82d4490a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml new file mode 100644 index 00000000..bdeb0984 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml new file mode 100644 index 00000000..51c56a3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml new file mode 100644 index 00000000..53148a54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml new file mode 100644 index 00000000..fb5fecf2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml new file mode 100644 index 00000000..35859dc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml new file mode 100644 index 00000000..dc2d414e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml new file mode 100644 index 00000000..abf65fd2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml new file mode 100644 index 00000000..04ce3436 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml new file mode 100644 index 00000000..e5694cf4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml new file mode 100644 index 00000000..e7fd0446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml new file mode 100644 index 00000000..b9ce14aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml new file mode 100644 index 00000000..fdae5298 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml new file mode 100644 index 00000000..4a03eec1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml new file mode 100644 index 00000000..bc0ed052 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml new file mode 100644 index 00000000..ed40afb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fil_social_sciences_tasks +task: global_mmlu_full_fil_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml new file mode 100644 index 00000000..85ed4d42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fil_other_tasks +task: global_mmlu_full_fil_virology diff --git a/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml new file mode 100644 index 00000000..3ee6bce1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/global_mmlu_full_fil_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fil_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fil_humanities_tasks +task: global_mmlu_full_fil_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fil/utils.py b/lm_eval/tasks/global_mmlu/full/fil/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fil/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml b/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml new file mode 100644 index 00000000..47ca7972 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_fr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: fr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml new file mode 100644 index 00000000..e85d6746 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_fr +task: + - global_mmlu_full_fr_stem + - global_mmlu_full_fr_other + - global_mmlu_full_fr_social_sciences + - global_mmlu_full_fr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml new file mode 100644 index 00000000..697e3a29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_humanities +task: + - global_mmlu_full_fr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml new file mode 100644 index 00000000..9b2ada6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_other +task: + - global_mmlu_full_fr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml new file mode 100644 index 00000000..ac7e4605 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_social_sciences +task: + - global_mmlu_full_fr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml new file mode 100644 index 00000000..c81d601f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_fr_stem +task: + - global_mmlu_full_fr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml new file mode 100644 index 00000000..bf7d76c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml new file mode 100644 index 00000000..e9a96927 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml new file mode 100644 index 00000000..6e4ca5a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml new file mode 100644 index 00000000..df3c1fbd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml new file mode 100644 index 00000000..b0daa2e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml new file mode 100644 index 00000000..1e997578 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml new file mode 100644 index 00000000..9c1c3189 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml new file mode 100644 index 00000000..078108f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml new file mode 100644 index 00000000..bf2f2940 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml new file mode 100644 index 00000000..8c9ccc80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml new file mode 100644 index 00000000..01dcea37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml new file mode 100644 index 00000000..794f64be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml new file mode 100644 index 00000000..12c6afc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml new file mode 100644 index 00000000..1f33ddab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml new file mode 100644 index 00000000..8dcb0585 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml new file mode 100644 index 00000000..2658ce96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml new file mode 100644 index 00000000..5239cb1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml new file mode 100644 index 00000000..2763dcb5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml new file mode 100644 index 00000000..2a6a26c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml new file mode 100644 index 00000000..6ffacc29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml new file mode 100644 index 00000000..d1720422 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml new file mode 100644 index 00000000..9788e7be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml new file mode 100644 index 00000000..3e2ff22e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2aba3b61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml new file mode 100644 index 00000000..21fb1df5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml new file mode 100644 index 00000000..a975d1fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml new file mode 100644 index 00000000..ff654ff3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml new file mode 100644 index 00000000..4038c956 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml new file mode 100644 index 00000000..a65da780 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml new file mode 100644 index 00000000..37c75136 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml new file mode 100644 index 00000000..a0e123f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml new file mode 100644 index 00000000..e2a9cf6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml new file mode 100644 index 00000000..b9e9ece9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml new file mode 100644 index 00000000..eac30d27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml new file mode 100644 index 00000000..2e15b0fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml new file mode 100644 index 00000000..f42079c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml new file mode 100644 index 00000000..68ebdb71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml new file mode 100644 index 00000000..25a8df3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_fr_stem_tasks +task: global_mmlu_full_fr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml new file mode 100644 index 00000000..73f7d869 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_management diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml new file mode 100644 index 00000000..8a19b83e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml new file mode 100644 index 00000000..d15774f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml new file mode 100644 index 00000000..d7519709 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml new file mode 100644 index 00000000..f625921e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml new file mode 100644 index 00000000..4575ca04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml new file mode 100644 index 00000000..61521c91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml new file mode 100644 index 00000000..7f771c34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml new file mode 100644 index 00000000..3bcac0f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml new file mode 100644 index 00000000..a06a7af5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml new file mode 100644 index 00000000..2ecf2e8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml new file mode 100644 index 00000000..983a2d38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml new file mode 100644 index 00000000..59d9aa30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml new file mode 100644 index 00000000..d84b7ad0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml new file mode 100644 index 00000000..fcd82b7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml new file mode 100644 index 00000000..ff7b8fd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml new file mode 100644 index 00000000..d92c2095 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_fr_social_sciences_tasks +task: global_mmlu_full_fr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml new file mode 100644 index 00000000..211c96a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_fr_other_tasks +task: global_mmlu_full_fr_virology diff --git a/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml new file mode 100644 index 00000000..f1f168ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/global_mmlu_full_fr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_fr_humanities_tasks +task: global_mmlu_full_fr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/fr/utils.py b/lm_eval/tasks/global_mmlu/full/fr/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/fr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml new file mode 100644 index 00000000..08a958bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ha +task: + - global_mmlu_full_ha_stem + - global_mmlu_full_ha_other + - global_mmlu_full_ha_social_sciences + - global_mmlu_full_ha_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml new file mode 100644 index 00000000..84cce38d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_humanities +task: + - global_mmlu_full_ha_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml new file mode 100644 index 00000000..73a6ea0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_other +task: + - global_mmlu_full_ha_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml new file mode 100644 index 00000000..8b520a5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_social_sciences +task: + - global_mmlu_full_ha_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml new file mode 100644 index 00000000..6213d280 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ha_stem +task: + - global_mmlu_full_ha_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml b/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml new file mode 100644 index 00000000..8521fe50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/_ha_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ha +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml new file mode 100644 index 00000000..62ad5e7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml new file mode 100644 index 00000000..2ead0f6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml new file mode 100644 index 00000000..1616398f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml new file mode 100644 index 00000000..c1719b0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml new file mode 100644 index 00000000..dcef5e27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml new file mode 100644 index 00000000..f2825694 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml new file mode 100644 index 00000000..73a422e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml new file mode 100644 index 00000000..7bdb65c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml new file mode 100644 index 00000000..47e5326c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml new file mode 100644 index 00000000..9065f085 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml new file mode 100644 index 00000000..40aa11c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml new file mode 100644 index 00000000..38d1e9c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml new file mode 100644 index 00000000..7326514a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml new file mode 100644 index 00000000..e865b6bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml new file mode 100644 index 00000000..9457d1bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml new file mode 100644 index 00000000..e04fb1ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml new file mode 100644 index 00000000..03c9cbac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml new file mode 100644 index 00000000..db104be5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml new file mode 100644 index 00000000..729fed2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml new file mode 100644 index 00000000..13f5621b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml new file mode 100644 index 00000000..1914e1fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml new file mode 100644 index 00000000..fa878b03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml new file mode 100644 index 00000000..10a13674 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml new file mode 100644 index 00000000..eebac409 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0a22ab84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml new file mode 100644 index 00000000..fc681f90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml new file mode 100644 index 00000000..81bb343c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml new file mode 100644 index 00000000..2bc4cc4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml new file mode 100644 index 00000000..c5d46e5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml new file mode 100644 index 00000000..4848cc31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml new file mode 100644 index 00000000..7a22c79a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml new file mode 100644 index 00000000..13882279 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml new file mode 100644 index 00000000..51ff436b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml new file mode 100644 index 00000000..1a36fb86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml new file mode 100644 index 00000000..f1c9cc1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml new file mode 100644 index 00000000..4bc1314b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml new file mode 100644 index 00000000..259534b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml new file mode 100644 index 00000000..c94a073b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ha_stem_tasks +task: global_mmlu_full_ha_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml new file mode 100644 index 00000000..666d4720 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_management diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml new file mode 100644 index 00000000..9528a1f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml new file mode 100644 index 00000000..92f0a408 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml new file mode 100644 index 00000000..fc97a8dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml new file mode 100644 index 00000000..dbcf96c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml new file mode 100644 index 00000000..aa7b4266 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml new file mode 100644 index 00000000..b413e4be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml new file mode 100644 index 00000000..118e4801 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml new file mode 100644 index 00000000..a310d023 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml new file mode 100644 index 00000000..79536ddc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml new file mode 100644 index 00000000..613170da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml new file mode 100644 index 00000000..bd65c233 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml new file mode 100644 index 00000000..cf7ecb1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml new file mode 100644 index 00000000..c9cba53f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml new file mode 100644 index 00000000..fe767686 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml new file mode 100644 index 00000000..94f8e311 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml new file mode 100644 index 00000000..54f82b3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ha_social_sciences_tasks +task: global_mmlu_full_ha_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml new file mode 100644 index 00000000..ce7d224d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ha_other_tasks +task: global_mmlu_full_ha_virology diff --git a/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml new file mode 100644 index 00000000..67a6d33d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/global_mmlu_full_ha_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ha_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ha_humanities_tasks +task: global_mmlu_full_ha_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ha/utils.py b/lm_eval/tasks/global_mmlu/full/ha/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ha/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml new file mode 100644 index 00000000..ff0a5e8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_he +task: + - global_mmlu_full_he_stem + - global_mmlu_full_he_other + - global_mmlu_full_he_social_sciences + - global_mmlu_full_he_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml new file mode 100644 index 00000000..678ee0d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_humanities +task: + - global_mmlu_full_he_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml new file mode 100644 index 00000000..c99b4806 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_other +task: + - global_mmlu_full_he_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml new file mode 100644 index 00000000..12906895 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_social_sciences +task: + - global_mmlu_full_he_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml new file mode 100644 index 00000000..f6e76e7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_he_stem +task: + - global_mmlu_full_he_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml b/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml new file mode 100644 index 00000000..b6ec9fc8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/_he_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: he +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml new file mode 100644 index 00000000..fb197c3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml new file mode 100644 index 00000000..3ab9ee20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml new file mode 100644 index 00000000..8950b1e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml new file mode 100644 index 00000000..8c114348 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml new file mode 100644 index 00000000..1324a04d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml new file mode 100644 index 00000000..cecddc60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml new file mode 100644 index 00000000..2c0f8b5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml new file mode 100644 index 00000000..b4c36a41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml new file mode 100644 index 00000000..3633d537 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml new file mode 100644 index 00000000..a28c592e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml new file mode 100644 index 00000000..3893b9aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml new file mode 100644 index 00000000..4167874e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml new file mode 100644 index 00000000..2ee92851 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml new file mode 100644 index 00000000..9ceb3277 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml new file mode 100644 index 00000000..00658e28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml new file mode 100644 index 00000000..10f2ac18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml new file mode 100644 index 00000000..aac3f8da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml new file mode 100644 index 00000000..299a73ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml new file mode 100644 index 00000000..9d3ba893 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml new file mode 100644 index 00000000..f67f8ef3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml new file mode 100644 index 00000000..7ca8b6f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml new file mode 100644 index 00000000..58e4081a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml new file mode 100644 index 00000000..2d76e387 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e3745110 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fa0b7c71 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml new file mode 100644 index 00000000..7f78a5c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml new file mode 100644 index 00000000..15be9243 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml new file mode 100644 index 00000000..6f309c0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml new file mode 100644 index 00000000..1ae831c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml new file mode 100644 index 00000000..3a2e8170 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml new file mode 100644 index 00000000..c05da45a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml new file mode 100644 index 00000000..b818e4fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml new file mode 100644 index 00000000..49f7ce5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml new file mode 100644 index 00000000..91d08567 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml new file mode 100644 index 00000000..1bedb4f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_international_law diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml new file mode 100644 index 00000000..39fe15a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml new file mode 100644 index 00000000..e54b58b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml new file mode 100644 index 00000000..8190e96a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_he_stem_tasks +task: global_mmlu_full_he_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml new file mode 100644 index 00000000..d5811f80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_management diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml new file mode 100644 index 00000000..7fe44232 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_marketing diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml new file mode 100644 index 00000000..8c9082c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml new file mode 100644 index 00000000..bc419dee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml new file mode 100644 index 00000000..d889642b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml new file mode 100644 index 00000000..11554823 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml new file mode 100644 index 00000000..30d49701 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml new file mode 100644 index 00000000..458632de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml new file mode 100644 index 00000000..93835673 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml new file mode 100644 index 00000000..aed28636 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml new file mode 100644 index 00000000..38a9e3cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml new file mode 100644 index 00000000..e8ca950c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml new file mode 100644 index 00000000..f82c2892 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml new file mode 100644 index 00000000..e3aff661 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml new file mode 100644 index 00000000..e99aa015 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml new file mode 100644 index 00000000..de81b92c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_sociology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml new file mode 100644 index 00000000..7be65044 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_he_social_sciences_tasks +task: global_mmlu_full_he_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml new file mode 100644 index 00000000..b6f51e1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_he_other_tasks +task: global_mmlu_full_he_virology diff --git a/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml new file mode 100644 index 00000000..e3d10a0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/global_mmlu_full_he_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _he_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_he_humanities_tasks +task: global_mmlu_full_he_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/he/utils.py b/lm_eval/tasks/global_mmlu/full/he/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/he/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml new file mode 100644 index 00000000..ed54a6ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_hi +task: + - global_mmlu_full_hi_stem + - global_mmlu_full_hi_other + - global_mmlu_full_hi_social_sciences + - global_mmlu_full_hi_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml new file mode 100644 index 00000000..36492fa3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_humanities +task: + - global_mmlu_full_hi_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml new file mode 100644 index 00000000..08dc16b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_other +task: + - global_mmlu_full_hi_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml new file mode 100644 index 00000000..0a4dfdd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_social_sciences +task: + - global_mmlu_full_hi_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml new file mode 100644 index 00000000..7a0123ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_hi_stem +task: + - global_mmlu_full_hi_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml b/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml new file mode 100644 index 00000000..18c6286e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/_hi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: hi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml new file mode 100644 index 00000000..f239f067 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml new file mode 100644 index 00000000..dfcd776e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml new file mode 100644 index 00000000..dbb6763d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml new file mode 100644 index 00000000..5882427e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml new file mode 100644 index 00000000..7b0c6c3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml new file mode 100644 index 00000000..d5326c8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml new file mode 100644 index 00000000..bf9e2130 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml new file mode 100644 index 00000000..c79f4250 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml new file mode 100644 index 00000000..4e8b0427 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml new file mode 100644 index 00000000..7e8c0df2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml new file mode 100644 index 00000000..5fe337ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml new file mode 100644 index 00000000..029a02e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml new file mode 100644 index 00000000..a6748974 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml new file mode 100644 index 00000000..355053b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml new file mode 100644 index 00000000..04dca10d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml new file mode 100644 index 00000000..ca7a3083 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml new file mode 100644 index 00000000..ae534fa6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml new file mode 100644 index 00000000..096fd58b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml new file mode 100644 index 00000000..9ef04ee5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml new file mode 100644 index 00000000..4e8913e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml new file mode 100644 index 00000000..180eef75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml new file mode 100644 index 00000000..32abd63b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml new file mode 100644 index 00000000..1089908b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml new file mode 100644 index 00000000..fb22bb51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml new file mode 100644 index 00000000..affc27c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml new file mode 100644 index 00000000..59f97c94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml new file mode 100644 index 00000000..a7506a4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml new file mode 100644 index 00000000..406035bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml new file mode 100644 index 00000000..f5c2be37 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml new file mode 100644 index 00000000..a955febe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml new file mode 100644 index 00000000..6a5573f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml new file mode 100644 index 00000000..38ce4680 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml new file mode 100644 index 00000000..2486301f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml new file mode 100644 index 00000000..8f889885 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml new file mode 100644 index 00000000..2cb0d834 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_international_law diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml new file mode 100644 index 00000000..11329130 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml new file mode 100644 index 00000000..e22cedbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml new file mode 100644 index 00000000..134ab080 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_hi_stem_tasks +task: global_mmlu_full_hi_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml new file mode 100644 index 00000000..e523b5d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_management diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml new file mode 100644 index 00000000..11d8930b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_marketing diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml new file mode 100644 index 00000000..ad38e3e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml new file mode 100644 index 00000000..c31f8883 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml new file mode 100644 index 00000000..01145f6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml new file mode 100644 index 00000000..4acbb127 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml new file mode 100644 index 00000000..889c0018 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml new file mode 100644 index 00000000..2a8aeb4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml new file mode 100644 index 00000000..ad80a3c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml new file mode 100644 index 00000000..1f547789 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml new file mode 100644 index 00000000..836d577d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml new file mode 100644 index 00000000..7a8e7db9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml new file mode 100644 index 00000000..b4ebc1a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml new file mode 100644 index 00000000..7bbf959c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml new file mode 100644 index 00000000..7faa9d43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml new file mode 100644 index 00000000..b0ca49ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_sociology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml new file mode 100644 index 00000000..d5fd9f0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_hi_social_sciences_tasks +task: global_mmlu_full_hi_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml new file mode 100644 index 00000000..843ea254 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_hi_other_tasks +task: global_mmlu_full_hi_virology diff --git a/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml new file mode 100644 index 00000000..f5e56ce0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/global_mmlu_full_hi_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_hi_humanities_tasks +task: global_mmlu_full_hi_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/hi/utils.py b/lm_eval/tasks/global_mmlu/full/hi/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/hi/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml new file mode 100644 index 00000000..f678660e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_id +task: + - global_mmlu_full_id_stem + - global_mmlu_full_id_other + - global_mmlu_full_id_social_sciences + - global_mmlu_full_id_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml new file mode 100644 index 00000000..b9283f55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_humanities +task: + - global_mmlu_full_id_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml new file mode 100644 index 00000000..74de0f36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_other +task: + - global_mmlu_full_id_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml new file mode 100644 index 00000000..b8656b6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_social_sciences +task: + - global_mmlu_full_id_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml new file mode 100644 index 00000000..d0e47276 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_id_stem +task: + - global_mmlu_full_id_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml b/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml new file mode 100644 index 00000000..32d9dc92 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/_id_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: id +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml new file mode 100644 index 00000000..b18c1cd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml new file mode 100644 index 00000000..65b83d9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml new file mode 100644 index 00000000..11f1047c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml new file mode 100644 index 00000000..9ed992f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml new file mode 100644 index 00000000..8baa424f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml new file mode 100644 index 00000000..67b9c935 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml new file mode 100644 index 00000000..3eb5d228 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml new file mode 100644 index 00000000..1462945b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml new file mode 100644 index 00000000..98062792 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml new file mode 100644 index 00000000..1a2736e8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml new file mode 100644 index 00000000..bb88c3f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml new file mode 100644 index 00000000..9764ac3e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml new file mode 100644 index 00000000..c70c111c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml new file mode 100644 index 00000000..7f82a74b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml new file mode 100644 index 00000000..3cc2dfba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml new file mode 100644 index 00000000..1d511b4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml new file mode 100644 index 00000000..7c6cef13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml new file mode 100644 index 00000000..5e7a44da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml new file mode 100644 index 00000000..d39c31ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml new file mode 100644 index 00000000..d92d827a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml new file mode 100644 index 00000000..ff714ac8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml new file mode 100644 index 00000000..0d5c8141 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml new file mode 100644 index 00000000..1ad392b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml new file mode 100644 index 00000000..850d6d82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml new file mode 100644 index 00000000..c1fda5c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml new file mode 100644 index 00000000..8a628ed9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml new file mode 100644 index 00000000..f2c44707 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml new file mode 100644 index 00000000..75888a3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml new file mode 100644 index 00000000..8a6ff54b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml new file mode 100644 index 00000000..ab205802 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml new file mode 100644 index 00000000..8dee8c31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml new file mode 100644 index 00000000..5474c8ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml new file mode 100644 index 00000000..464ac67f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml new file mode 100644 index 00000000..518cb30c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml new file mode 100644 index 00000000..90262ada --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_international_law diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml new file mode 100644 index 00000000..8727ab49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml new file mode 100644 index 00000000..da2c8e6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml new file mode 100644 index 00000000..84a30d9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_id_stem_tasks +task: global_mmlu_full_id_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml new file mode 100644 index 00000000..fdd340bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_management diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml new file mode 100644 index 00000000..caf3eb0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_marketing diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml new file mode 100644 index 00000000..0d649fd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml new file mode 100644 index 00000000..0811f1b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml new file mode 100644 index 00000000..0a124ded --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml new file mode 100644 index 00000000..65dfaea7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml new file mode 100644 index 00000000..804ffc60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml new file mode 100644 index 00000000..88b37de8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml new file mode 100644 index 00000000..1e851c49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml new file mode 100644 index 00000000..d45c9517 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml new file mode 100644 index 00000000..965cbad6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml new file mode 100644 index 00000000..fdd02d53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml new file mode 100644 index 00000000..b8d294b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml new file mode 100644 index 00000000..8f772b0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml new file mode 100644 index 00000000..1a73d36b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml new file mode 100644 index 00000000..715e5c31 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_sociology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml new file mode 100644 index 00000000..59e147ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_id_social_sciences_tasks +task: global_mmlu_full_id_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml new file mode 100644 index 00000000..50225ab5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_id_other_tasks +task: global_mmlu_full_id_virology diff --git a/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml new file mode 100644 index 00000000..0193d12d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/global_mmlu_full_id_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_id_humanities_tasks +task: global_mmlu_full_id_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/id/utils.py b/lm_eval/tasks/global_mmlu/full/id/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/id/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml new file mode 100644 index 00000000..a263e295 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ig +task: + - global_mmlu_full_ig_stem + - global_mmlu_full_ig_other + - global_mmlu_full_ig_social_sciences + - global_mmlu_full_ig_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml new file mode 100644 index 00000000..6c6ffb61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_humanities +task: + - global_mmlu_full_ig_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml new file mode 100644 index 00000000..214efed2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_other +task: + - global_mmlu_full_ig_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml new file mode 100644 index 00000000..e27fe1fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_social_sciences +task: + - global_mmlu_full_ig_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml new file mode 100644 index 00000000..5dd33b62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ig_stem +task: + - global_mmlu_full_ig_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml b/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml new file mode 100644 index 00000000..0832c633 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/_ig_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ig +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml new file mode 100644 index 00000000..1dbf6c83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml new file mode 100644 index 00000000..8dc198c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml new file mode 100644 index 00000000..078069eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml new file mode 100644 index 00000000..f075e740 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml new file mode 100644 index 00000000..d41779ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml new file mode 100644 index 00000000..5f0e5705 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml new file mode 100644 index 00000000..78e25dc8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml new file mode 100644 index 00000000..d9894a45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml new file mode 100644 index 00000000..8976041f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml new file mode 100644 index 00000000..5edaf0d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml new file mode 100644 index 00000000..e55c01cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml new file mode 100644 index 00000000..5ee7564c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml new file mode 100644 index 00000000..555d4fa8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml new file mode 100644 index 00000000..783804b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml new file mode 100644 index 00000000..789f95d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml new file mode 100644 index 00000000..7a5c9d2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml new file mode 100644 index 00000000..8f9e426c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml new file mode 100644 index 00000000..d9b7955c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml new file mode 100644 index 00000000..368bc71d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml new file mode 100644 index 00000000..1ce77e10 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml new file mode 100644 index 00000000..d859f390 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml new file mode 100644 index 00000000..29a93f46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml new file mode 100644 index 00000000..74194a44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml new file mode 100644 index 00000000..cd53504d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml new file mode 100644 index 00000000..30244a64 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml new file mode 100644 index 00000000..737c0a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml new file mode 100644 index 00000000..c5a2220c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml new file mode 100644 index 00000000..a7d4c537 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml new file mode 100644 index 00000000..d3051f01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml new file mode 100644 index 00000000..d4841032 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml new file mode 100644 index 00000000..61e124fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml new file mode 100644 index 00000000..3d83a63d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml new file mode 100644 index 00000000..787e3151 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml new file mode 100644 index 00000000..5c618459 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml new file mode 100644 index 00000000..3a8511d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml new file mode 100644 index 00000000..46254ea1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml new file mode 100644 index 00000000..2bce7502 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml new file mode 100644 index 00000000..93c87fbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ig_stem_tasks +task: global_mmlu_full_ig_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml new file mode 100644 index 00000000..780e1c89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_management diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml new file mode 100644 index 00000000..2d30ece9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml new file mode 100644 index 00000000..cac197c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml new file mode 100644 index 00000000..a3824510 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml new file mode 100644 index 00000000..cc545d84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml new file mode 100644 index 00000000..60ad22fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml new file mode 100644 index 00000000..3cc55607 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml new file mode 100644 index 00000000..3f655632 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml new file mode 100644 index 00000000..db4affcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml new file mode 100644 index 00000000..18d35773 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml new file mode 100644 index 00000000..e9db41d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml new file mode 100644 index 00000000..7fa28b60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml new file mode 100644 index 00000000..639be381 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml new file mode 100644 index 00000000..d31af09f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml new file mode 100644 index 00000000..200db46b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml new file mode 100644 index 00000000..65a3e4e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml new file mode 100644 index 00000000..ff0b0505 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ig_social_sciences_tasks +task: global_mmlu_full_ig_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml new file mode 100644 index 00000000..b437c82f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ig_other_tasks +task: global_mmlu_full_ig_virology diff --git a/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml new file mode 100644 index 00000000..6fbc7cfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/global_mmlu_full_ig_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ig_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ig_humanities_tasks +task: global_mmlu_full_ig_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ig/utils.py b/lm_eval/tasks/global_mmlu/full/ig/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ig/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml new file mode 100644 index 00000000..dabb8987 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_it +task: + - global_mmlu_full_it_stem + - global_mmlu_full_it_other + - global_mmlu_full_it_social_sciences + - global_mmlu_full_it_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml new file mode 100644 index 00000000..3d072ccc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_humanities +task: + - global_mmlu_full_it_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml new file mode 100644 index 00000000..99fe18cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_other +task: + - global_mmlu_full_it_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml new file mode 100644 index 00000000..15a457a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_social_sciences +task: + - global_mmlu_full_it_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml new file mode 100644 index 00000000..cf7a555d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_it_stem +task: + - global_mmlu_full_it_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml b/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml new file mode 100644 index 00000000..4798e10a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/_it_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: it +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml new file mode 100644 index 00000000..f7351c1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml new file mode 100644 index 00000000..436cd3f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml new file mode 100644 index 00000000..f98f0f20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml new file mode 100644 index 00000000..d9d931fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml new file mode 100644 index 00000000..fe429024 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml new file mode 100644 index 00000000..71b8f45e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml new file mode 100644 index 00000000..d29bd758 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml new file mode 100644 index 00000000..f740d259 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml new file mode 100644 index 00000000..7568fb7e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml new file mode 100644 index 00000000..9bfc5ac1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml new file mode 100644 index 00000000..2101847e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml new file mode 100644 index 00000000..70b31f9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml new file mode 100644 index 00000000..d8917d40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml new file mode 100644 index 00000000..a49352fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml new file mode 100644 index 00000000..27f0c6c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml new file mode 100644 index 00000000..fd78a52e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml new file mode 100644 index 00000000..8171fcf1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml new file mode 100644 index 00000000..a952ed44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml new file mode 100644 index 00000000..939ba752 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml new file mode 100644 index 00000000..4524d4dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml new file mode 100644 index 00000000..2dfb1649 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml new file mode 100644 index 00000000..556aaf20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml new file mode 100644 index 00000000..3c1d5b60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml new file mode 100644 index 00000000..a35b6bac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml new file mode 100644 index 00000000..74c01ccd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml new file mode 100644 index 00000000..6bec02c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml new file mode 100644 index 00000000..551a0f8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml new file mode 100644 index 00000000..3cf7144b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml new file mode 100644 index 00000000..17088e51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml new file mode 100644 index 00000000..f3f35f99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml new file mode 100644 index 00000000..af222877 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml new file mode 100644 index 00000000..698ddb5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml new file mode 100644 index 00000000..6ff49730 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml new file mode 100644 index 00000000..58d32fa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml new file mode 100644 index 00000000..d7c47e55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_international_law diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml new file mode 100644 index 00000000..e100c0e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml new file mode 100644 index 00000000..a07444a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml new file mode 100644 index 00000000..bfd3b7a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_it_stem_tasks +task: global_mmlu_full_it_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml new file mode 100644 index 00000000..5b5feeac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_management diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml new file mode 100644 index 00000000..d50b46f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_marketing diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml new file mode 100644 index 00000000..1b02316c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml new file mode 100644 index 00000000..b638b50d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml new file mode 100644 index 00000000..520a8bea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml new file mode 100644 index 00000000..abfc7395 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml new file mode 100644 index 00000000..cac74152 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml new file mode 100644 index 00000000..a1d94976 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml new file mode 100644 index 00000000..74bdec82 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml new file mode 100644 index 00000000..acf999a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml new file mode 100644 index 00000000..1ec4f58e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml new file mode 100644 index 00000000..8b53cdac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml new file mode 100644 index 00000000..9b7a24f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml new file mode 100644 index 00000000..727cf4b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml new file mode 100644 index 00000000..90fd186c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml new file mode 100644 index 00000000..dbc77935 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_sociology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml new file mode 100644 index 00000000..e2a923b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_it_social_sciences_tasks +task: global_mmlu_full_it_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml new file mode 100644 index 00000000..72758a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_it_other_tasks +task: global_mmlu_full_it_virology diff --git a/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml new file mode 100644 index 00000000..e4491c51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/global_mmlu_full_it_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_it_humanities_tasks +task: global_mmlu_full_it_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/it/utils.py b/lm_eval/tasks/global_mmlu/full/it/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/it/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml new file mode 100644 index 00000000..103460d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ja +task: + - global_mmlu_full_ja_stem + - global_mmlu_full_ja_other + - global_mmlu_full_ja_social_sciences + - global_mmlu_full_ja_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml new file mode 100644 index 00000000..a063eb0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_humanities +task: + - global_mmlu_full_ja_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml new file mode 100644 index 00000000..1f9b95ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_other +task: + - global_mmlu_full_ja_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml new file mode 100644 index 00000000..4207fea4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_social_sciences +task: + - global_mmlu_full_ja_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml new file mode 100644 index 00000000..7ca6ed1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ja_stem +task: + - global_mmlu_full_ja_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml b/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml new file mode 100644 index 00000000..591725e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/_ja_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ja +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml new file mode 100644 index 00000000..4b65a75b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml new file mode 100644 index 00000000..e735aa34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml new file mode 100644 index 00000000..a0ba8947 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml new file mode 100644 index 00000000..c39d286c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml new file mode 100644 index 00000000..27d09b88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml new file mode 100644 index 00000000..15e26a51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml new file mode 100644 index 00000000..52c92423 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml new file mode 100644 index 00000000..a91a7d61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml new file mode 100644 index 00000000..67dcd2a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml new file mode 100644 index 00000000..c55ab2a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml new file mode 100644 index 00000000..5413c86d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml new file mode 100644 index 00000000..276f214e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml new file mode 100644 index 00000000..f823ac44 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml new file mode 100644 index 00000000..dbc6846a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml new file mode 100644 index 00000000..ba729575 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml new file mode 100644 index 00000000..13807104 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml new file mode 100644 index 00000000..d88d5685 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml new file mode 100644 index 00000000..64cb2b9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml new file mode 100644 index 00000000..b88adf90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml new file mode 100644 index 00000000..eef67cc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml new file mode 100644 index 00000000..c90e5fb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml new file mode 100644 index 00000000..8318099a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml new file mode 100644 index 00000000..4ed5a620 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml new file mode 100644 index 00000000..6ec0ab84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml new file mode 100644 index 00000000..4bdd9555 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml new file mode 100644 index 00000000..3abfd81b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml new file mode 100644 index 00000000..483161a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml new file mode 100644 index 00000000..702092af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml new file mode 100644 index 00000000..1b7ce92e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml new file mode 100644 index 00000000..c68acb8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml new file mode 100644 index 00000000..b1b91833 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml new file mode 100644 index 00000000..2dee1f89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml new file mode 100644 index 00000000..3612a7ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml new file mode 100644 index 00000000..b70204fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml new file mode 100644 index 00000000..77ed3c97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml new file mode 100644 index 00000000..f8fbb261 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml new file mode 100644 index 00000000..58d4afcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml new file mode 100644 index 00000000..e664390f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ja_stem_tasks +task: global_mmlu_full_ja_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml new file mode 100644 index 00000000..cf495ae2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_management diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml new file mode 100644 index 00000000..1349771e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml new file mode 100644 index 00000000..1b513ac4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml new file mode 100644 index 00000000..81659bf7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml new file mode 100644 index 00000000..2e77694b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml new file mode 100644 index 00000000..f322376d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml new file mode 100644 index 00000000..1d58fb0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml new file mode 100644 index 00000000..23865361 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml new file mode 100644 index 00000000..a044bf99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml new file mode 100644 index 00000000..b828e0e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml new file mode 100644 index 00000000..7aafb6c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml new file mode 100644 index 00000000..b0cf9905 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml new file mode 100644 index 00000000..e5ef36c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml new file mode 100644 index 00000000..565439e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml new file mode 100644 index 00000000..f7d21bd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml new file mode 100644 index 00000000..5cc44c1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml new file mode 100644 index 00000000..8ebdb14a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ja_social_sciences_tasks +task: global_mmlu_full_ja_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml new file mode 100644 index 00000000..d6f83367 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ja_other_tasks +task: global_mmlu_full_ja_virology diff --git a/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml new file mode 100644 index 00000000..23e66e06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/global_mmlu_full_ja_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ja_humanities_tasks +task: global_mmlu_full_ja_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ja/utils.py b/lm_eval/tasks/global_mmlu/full/ja/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ja/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml new file mode 100644 index 00000000..d2225e23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ko +task: + - global_mmlu_full_ko_stem + - global_mmlu_full_ko_other + - global_mmlu_full_ko_social_sciences + - global_mmlu_full_ko_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml new file mode 100644 index 00000000..c7690643 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_humanities +task: + - global_mmlu_full_ko_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml new file mode 100644 index 00000000..8990ae95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_other +task: + - global_mmlu_full_ko_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml new file mode 100644 index 00000000..0bbfad7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_social_sciences +task: + - global_mmlu_full_ko_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml new file mode 100644 index 00000000..18b7f17b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ko_stem +task: + - global_mmlu_full_ko_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml b/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml new file mode 100644 index 00000000..11700a26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/_ko_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ko +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml new file mode 100644 index 00000000..5959d788 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml new file mode 100644 index 00000000..ebb90860 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml new file mode 100644 index 00000000..670846b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml new file mode 100644 index 00000000..1a44e430 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml new file mode 100644 index 00000000..e9e29697 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml new file mode 100644 index 00000000..fc364468 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml new file mode 100644 index 00000000..2eb0f416 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml new file mode 100644 index 00000000..044f1eff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml new file mode 100644 index 00000000..9929097c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml new file mode 100644 index 00000000..b78c24e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml new file mode 100644 index 00000000..20c3fb20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml new file mode 100644 index 00000000..1f954572 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml new file mode 100644 index 00000000..f7998975 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml new file mode 100644 index 00000000..79c35ed7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml new file mode 100644 index 00000000..1444a249 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml new file mode 100644 index 00000000..8bec91b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml new file mode 100644 index 00000000..1cf31092 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml new file mode 100644 index 00000000..2a5f7bd5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml new file mode 100644 index 00000000..bdaed574 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml new file mode 100644 index 00000000..193a064c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml new file mode 100644 index 00000000..2d2ad648 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml new file mode 100644 index 00000000..a48b602d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml new file mode 100644 index 00000000..cc9c20eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e86a27fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml new file mode 100644 index 00000000..4b947f7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml new file mode 100644 index 00000000..9184ad9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml new file mode 100644 index 00000000..50b6a150 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml new file mode 100644 index 00000000..974e3b03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml new file mode 100644 index 00000000..e617e8cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml new file mode 100644 index 00000000..1a010596 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml new file mode 100644 index 00000000..a696675d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml new file mode 100644 index 00000000..eca86cbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml new file mode 100644 index 00000000..69e3a2df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml new file mode 100644 index 00000000..ed3e99fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml new file mode 100644 index 00000000..651f389c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml new file mode 100644 index 00000000..001807eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml new file mode 100644 index 00000000..01eec477 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml new file mode 100644 index 00000000..c1126c6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ko_stem_tasks +task: global_mmlu_full_ko_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml new file mode 100644 index 00000000..3b833270 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_management diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml new file mode 100644 index 00000000..3cce25c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml new file mode 100644 index 00000000..65df1786 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml new file mode 100644 index 00000000..04b71e2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml new file mode 100644 index 00000000..3f1e7fa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml new file mode 100644 index 00000000..c657543a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml new file mode 100644 index 00000000..dff6450f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml new file mode 100644 index 00000000..21f058af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml new file mode 100644 index 00000000..56aedae9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml new file mode 100644 index 00000000..24f83b23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml new file mode 100644 index 00000000..ece9dc5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml new file mode 100644 index 00000000..43930957 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml new file mode 100644 index 00000000..98ff6520 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml new file mode 100644 index 00000000..1a5b07f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml new file mode 100644 index 00000000..3663391a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml new file mode 100644 index 00000000..902b4443 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml new file mode 100644 index 00000000..36e1794c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ko_social_sciences_tasks +task: global_mmlu_full_ko_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml new file mode 100644 index 00000000..64b58d6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ko_other_tasks +task: global_mmlu_full_ko_virology diff --git a/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml new file mode 100644 index 00000000..7289671f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/global_mmlu_full_ko_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ko_humanities_tasks +task: global_mmlu_full_ko_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ko/utils.py b/lm_eval/tasks/global_mmlu/full/ko/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ko/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml new file mode 100644 index 00000000..4774599a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ky +task: + - global_mmlu_full_ky_stem + - global_mmlu_full_ky_other + - global_mmlu_full_ky_social_sciences + - global_mmlu_full_ky_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml new file mode 100644 index 00000000..1e0368c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_humanities +task: + - global_mmlu_full_ky_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml new file mode 100644 index 00000000..1bfc89ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_other +task: + - global_mmlu_full_ky_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml new file mode 100644 index 00000000..3ae756c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_social_sciences +task: + - global_mmlu_full_ky_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml new file mode 100644 index 00000000..817456fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ky_stem +task: + - global_mmlu_full_ky_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml b/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml new file mode 100644 index 00000000..63f88823 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/_ky_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ky +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml new file mode 100644 index 00000000..21338a56 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml new file mode 100644 index 00000000..df263548 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml new file mode 100644 index 00000000..5e0f6aba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml new file mode 100644 index 00000000..17656dc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml new file mode 100644 index 00000000..8c053b88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml new file mode 100644 index 00000000..36492106 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml new file mode 100644 index 00000000..cb9f8586 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml new file mode 100644 index 00000000..e4b15b54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml new file mode 100644 index 00000000..f5657b66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml new file mode 100644 index 00000000..f1e0c25e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml new file mode 100644 index 00000000..fac1d80f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml new file mode 100644 index 00000000..e35718d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml new file mode 100644 index 00000000..f165ec61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml new file mode 100644 index 00000000..48670c7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml new file mode 100644 index 00000000..29d24142 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml new file mode 100644 index 00000000..9aa7f81b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml new file mode 100644 index 00000000..70a5bd86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml new file mode 100644 index 00000000..f678c0d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml new file mode 100644 index 00000000..750bc68b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml new file mode 100644 index 00000000..7700e37f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml new file mode 100644 index 00000000..c805fc4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml new file mode 100644 index 00000000..01c67f8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml new file mode 100644 index 00000000..ccc5c8b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml new file mode 100644 index 00000000..02ea66ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f693296d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml new file mode 100644 index 00000000..b05e2799 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml new file mode 100644 index 00000000..d596290f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml new file mode 100644 index 00000000..3f71865c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml new file mode 100644 index 00000000..635873a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml new file mode 100644 index 00000000..df8cfefb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml new file mode 100644 index 00000000..3c75f534 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml new file mode 100644 index 00000000..e73edcbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml new file mode 100644 index 00000000..f4e662a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml new file mode 100644 index 00000000..8c2556da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml new file mode 100644 index 00000000..2af16190 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml new file mode 100644 index 00000000..f0994cc2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml new file mode 100644 index 00000000..f7933a77 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml new file mode 100644 index 00000000..f6e525a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ky_stem_tasks +task: global_mmlu_full_ky_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml new file mode 100644 index 00000000..03f70aa0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_management diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml new file mode 100644 index 00000000..72ced798 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml new file mode 100644 index 00000000..371e4b21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml new file mode 100644 index 00000000..e693ab8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml new file mode 100644 index 00000000..ccafcb1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml new file mode 100644 index 00000000..16c19b29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml new file mode 100644 index 00000000..f6c00cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml new file mode 100644 index 00000000..6d6d242b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml new file mode 100644 index 00000000..4ff2e08d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml new file mode 100644 index 00000000..37c6a892 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml new file mode 100644 index 00000000..9b4fea0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml new file mode 100644 index 00000000..8a6ef0b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml new file mode 100644 index 00000000..dce1b6d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml new file mode 100644 index 00000000..168cae74 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml new file mode 100644 index 00000000..1e24b816 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml new file mode 100644 index 00000000..7d1ad959 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml new file mode 100644 index 00000000..36cd7e20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ky_social_sciences_tasks +task: global_mmlu_full_ky_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml new file mode 100644 index 00000000..e2a77915 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ky_other_tasks +task: global_mmlu_full_ky_virology diff --git a/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml new file mode 100644 index 00000000..563c1397 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/global_mmlu_full_ky_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ky_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ky_humanities_tasks +task: global_mmlu_full_ky_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ky/utils.py b/lm_eval/tasks/global_mmlu/full/ky/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ky/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml new file mode 100644 index 00000000..93929d42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_lt +task: + - global_mmlu_full_lt_stem + - global_mmlu_full_lt_other + - global_mmlu_full_lt_social_sciences + - global_mmlu_full_lt_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml new file mode 100644 index 00000000..48ad351f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_humanities +task: + - global_mmlu_full_lt_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml new file mode 100644 index 00000000..8f63c35a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_other +task: + - global_mmlu_full_lt_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml new file mode 100644 index 00000000..9ababd6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_social_sciences +task: + - global_mmlu_full_lt_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml new file mode 100644 index 00000000..1a59e683 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_lt_stem +task: + - global_mmlu_full_lt_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml b/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml new file mode 100644 index 00000000..8b925338 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/_lt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: lt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml new file mode 100644 index 00000000..76b96844 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml new file mode 100644 index 00000000..527c7107 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml new file mode 100644 index 00000000..419b89e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml new file mode 100644 index 00000000..c51daa22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml new file mode 100644 index 00000000..e0232774 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml new file mode 100644 index 00000000..c6fea6f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml new file mode 100644 index 00000000..93b9a561 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml new file mode 100644 index 00000000..8d0dcfdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml new file mode 100644 index 00000000..8d33b747 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml new file mode 100644 index 00000000..ad74dbb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml new file mode 100644 index 00000000..3c69754b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml new file mode 100644 index 00000000..d78f3a54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml new file mode 100644 index 00000000..3e7b5e49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml new file mode 100644 index 00000000..6d0085ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml new file mode 100644 index 00000000..284dfe9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml new file mode 100644 index 00000000..7e9a0103 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml new file mode 100644 index 00000000..ec9a665b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml new file mode 100644 index 00000000..d81a9470 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml new file mode 100644 index 00000000..139376cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml new file mode 100644 index 00000000..87112d8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml new file mode 100644 index 00000000..2324bb28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml new file mode 100644 index 00000000..5f365fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml new file mode 100644 index 00000000..e3a6f921 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml new file mode 100644 index 00000000..526b68ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml new file mode 100644 index 00000000..e14b1dce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml new file mode 100644 index 00000000..1cdf5c90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml new file mode 100644 index 00000000..a2f2a210 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml new file mode 100644 index 00000000..bd363709 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml new file mode 100644 index 00000000..aad65a13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml new file mode 100644 index 00000000..6dd6d699 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml new file mode 100644 index 00000000..5fb0ee1e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml new file mode 100644 index 00000000..75f2769a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml new file mode 100644 index 00000000..beb27e9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml new file mode 100644 index 00000000..c9d952c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml new file mode 100644 index 00000000..f77adf9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_international_law diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml new file mode 100644 index 00000000..e6be84fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml new file mode 100644 index 00000000..ad597b27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml new file mode 100644 index 00000000..eb06a871 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_lt_stem_tasks +task: global_mmlu_full_lt_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml new file mode 100644 index 00000000..e1885ad3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_management diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml new file mode 100644 index 00000000..2dc83089 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_marketing diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml new file mode 100644 index 00000000..b67d321e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml new file mode 100644 index 00000000..2c744613 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml new file mode 100644 index 00000000..09e6f044 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml new file mode 100644 index 00000000..bb8dd330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml new file mode 100644 index 00000000..0b1a8556 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml new file mode 100644 index 00000000..aab1d556 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml new file mode 100644 index 00000000..ac93dd6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml new file mode 100644 index 00000000..6be78ec3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml new file mode 100644 index 00000000..60b6cdcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml new file mode 100644 index 00000000..dd899676 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml new file mode 100644 index 00000000..bd796e8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml new file mode 100644 index 00000000..3c6e5f39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml new file mode 100644 index 00000000..9eb9957d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml new file mode 100644 index 00000000..2e17f95a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_sociology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml new file mode 100644 index 00000000..d39bb63c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_lt_social_sciences_tasks +task: global_mmlu_full_lt_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml new file mode 100644 index 00000000..b8482a61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_lt_other_tasks +task: global_mmlu_full_lt_virology diff --git a/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml new file mode 100644 index 00000000..a86af60d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/global_mmlu_full_lt_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _lt_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_lt_humanities_tasks +task: global_mmlu_full_lt_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/lt/utils.py b/lm_eval/tasks/global_mmlu/full/lt/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/lt/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml new file mode 100644 index 00000000..05b55948 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_mg +task: + - global_mmlu_full_mg_stem + - global_mmlu_full_mg_other + - global_mmlu_full_mg_social_sciences + - global_mmlu_full_mg_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml new file mode 100644 index 00000000..76b08f6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_humanities +task: + - global_mmlu_full_mg_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml new file mode 100644 index 00000000..0006af4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_other +task: + - global_mmlu_full_mg_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml new file mode 100644 index 00000000..9cfe4f5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_social_sciences +task: + - global_mmlu_full_mg_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml new file mode 100644 index 00000000..bdc719d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_mg_stem +task: + - global_mmlu_full_mg_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml b/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml new file mode 100644 index 00000000..4aa97b27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/_mg_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: mg +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml new file mode 100644 index 00000000..bea850ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml new file mode 100644 index 00000000..1cf6c116 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml new file mode 100644 index 00000000..df582b27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml new file mode 100644 index 00000000..a6351342 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml new file mode 100644 index 00000000..21003af5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml new file mode 100644 index 00000000..d305ca94 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml new file mode 100644 index 00000000..7ccaffb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml new file mode 100644 index 00000000..248f72c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml new file mode 100644 index 00000000..fb817aae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml new file mode 100644 index 00000000..4fdbee03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml new file mode 100644 index 00000000..493bdf87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml new file mode 100644 index 00000000..c5054eb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml new file mode 100644 index 00000000..44a13a70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml new file mode 100644 index 00000000..2c5d029a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml new file mode 100644 index 00000000..1e5ece33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml new file mode 100644 index 00000000..4d62c758 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml new file mode 100644 index 00000000..e5dc67d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml new file mode 100644 index 00000000..2712e9b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml new file mode 100644 index 00000000..c58957e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml new file mode 100644 index 00000000..707b7356 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml new file mode 100644 index 00000000..d7afd5a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml new file mode 100644 index 00000000..b6391ee4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml new file mode 100644 index 00000000..eb7014a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml new file mode 100644 index 00000000..74c5fc18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml new file mode 100644 index 00000000..24631ff3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml new file mode 100644 index 00000000..b9db4a0c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml new file mode 100644 index 00000000..f321b06a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml new file mode 100644 index 00000000..bc25971e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml new file mode 100644 index 00000000..42cc39a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml new file mode 100644 index 00000000..08cf8671 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml new file mode 100644 index 00000000..87314a57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml new file mode 100644 index 00000000..c341a243 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml new file mode 100644 index 00000000..15375f9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml new file mode 100644 index 00000000..21419b9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml new file mode 100644 index 00000000..9d481339 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_international_law diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml new file mode 100644 index 00000000..f083a0ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml new file mode 100644 index 00000000..57e2e731 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml new file mode 100644 index 00000000..7609a09f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_mg_stem_tasks +task: global_mmlu_full_mg_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml new file mode 100644 index 00000000..becfe4b3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_management diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml new file mode 100644 index 00000000..3765002b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_marketing diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml new file mode 100644 index 00000000..3f023ccd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml new file mode 100644 index 00000000..2993999d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml new file mode 100644 index 00000000..fd430a0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml new file mode 100644 index 00000000..c1b16e86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml new file mode 100644 index 00000000..ab471f42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml new file mode 100644 index 00000000..f598830e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml new file mode 100644 index 00000000..330f1f52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml new file mode 100644 index 00000000..694118d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml new file mode 100644 index 00000000..fb6df92a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml new file mode 100644 index 00000000..1de72b6b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml new file mode 100644 index 00000000..f922e162 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml new file mode 100644 index 00000000..c829b89d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml new file mode 100644 index 00000000..362b4dbd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml new file mode 100644 index 00000000..f0638cdb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_sociology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml new file mode 100644 index 00000000..8ead541a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_mg_social_sciences_tasks +task: global_mmlu_full_mg_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml new file mode 100644 index 00000000..1ca09027 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_mg_other_tasks +task: global_mmlu_full_mg_virology diff --git a/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml new file mode 100644 index 00000000..2bb64d70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/global_mmlu_full_mg_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _mg_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_mg_humanities_tasks +task: global_mmlu_full_mg_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/mg/utils.py b/lm_eval/tasks/global_mmlu/full/mg/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/mg/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml new file mode 100644 index 00000000..e5a13645 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ms +task: + - global_mmlu_full_ms_stem + - global_mmlu_full_ms_other + - global_mmlu_full_ms_social_sciences + - global_mmlu_full_ms_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml new file mode 100644 index 00000000..0641187b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_humanities +task: + - global_mmlu_full_ms_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml new file mode 100644 index 00000000..3d14420c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_other +task: + - global_mmlu_full_ms_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml new file mode 100644 index 00000000..3db339d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_social_sciences +task: + - global_mmlu_full_ms_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml new file mode 100644 index 00000000..68908e16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ms_stem +task: + - global_mmlu_full_ms_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml b/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml new file mode 100644 index 00000000..ba750264 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/_ms_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ms +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml new file mode 100644 index 00000000..ec791f2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml new file mode 100644 index 00000000..35038bea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml new file mode 100644 index 00000000..79fdcbdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml new file mode 100644 index 00000000..ffd6195a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml new file mode 100644 index 00000000..4c69b82e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml new file mode 100644 index 00000000..58219479 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml new file mode 100644 index 00000000..35514b83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml new file mode 100644 index 00000000..5e242b8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml new file mode 100644 index 00000000..07e10799 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml new file mode 100644 index 00000000..82822217 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml new file mode 100644 index 00000000..be20fa6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml new file mode 100644 index 00000000..2e886b50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml new file mode 100644 index 00000000..2a2fb6da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml new file mode 100644 index 00000000..efdffabf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml new file mode 100644 index 00000000..80eba2e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml new file mode 100644 index 00000000..1e6caf26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml new file mode 100644 index 00000000..59147662 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml new file mode 100644 index 00000000..6ac76cad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml new file mode 100644 index 00000000..6be8ccfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml new file mode 100644 index 00000000..f01c29b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml new file mode 100644 index 00000000..b18e8cf8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml new file mode 100644 index 00000000..fdb41802 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml new file mode 100644 index 00000000..c4e44a60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml new file mode 100644 index 00000000..0ebbfe6f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f28f9a5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml new file mode 100644 index 00000000..50a2552d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml new file mode 100644 index 00000000..6747cd9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml new file mode 100644 index 00000000..aef3fee8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml new file mode 100644 index 00000000..3e8641e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml new file mode 100644 index 00000000..4aa7ba00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml new file mode 100644 index 00000000..e6d1faab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml new file mode 100644 index 00000000..4caf7e54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml new file mode 100644 index 00000000..5b2b5c5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml new file mode 100644 index 00000000..2ddef17a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml new file mode 100644 index 00000000..61795f58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml new file mode 100644 index 00000000..f2e96706 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml new file mode 100644 index 00000000..1d142bde --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml new file mode 100644 index 00000000..94724056 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ms_stem_tasks +task: global_mmlu_full_ms_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml new file mode 100644 index 00000000..8ca04a13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_management diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml new file mode 100644 index 00000000..ec0e4462 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml new file mode 100644 index 00000000..0f2b1eec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml new file mode 100644 index 00000000..65da952e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml new file mode 100644 index 00000000..399035f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml new file mode 100644 index 00000000..3bc74baa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml new file mode 100644 index 00000000..300de677 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml new file mode 100644 index 00000000..8f6eceae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml new file mode 100644 index 00000000..4c624fec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml new file mode 100644 index 00000000..9a06e7f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml new file mode 100644 index 00000000..b3d5921a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml new file mode 100644 index 00000000..0d9a58b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml new file mode 100644 index 00000000..7f51baec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml new file mode 100644 index 00000000..c07cbdee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml new file mode 100644 index 00000000..651cb72d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml new file mode 100644 index 00000000..5aeb7efa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml new file mode 100644 index 00000000..ecbf5705 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ms_social_sciences_tasks +task: global_mmlu_full_ms_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml new file mode 100644 index 00000000..fbdd5e25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ms_other_tasks +task: global_mmlu_full_ms_virology diff --git a/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml new file mode 100644 index 00000000..32b35029 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/global_mmlu_full_ms_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ms_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ms_humanities_tasks +task: global_mmlu_full_ms_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ms/utils.py b/lm_eval/tasks/global_mmlu/full/ms/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ms/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml new file mode 100644 index 00000000..ec13a0be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ne +task: + - global_mmlu_full_ne_stem + - global_mmlu_full_ne_other + - global_mmlu_full_ne_social_sciences + - global_mmlu_full_ne_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml new file mode 100644 index 00000000..fef749db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_humanities +task: + - global_mmlu_full_ne_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml new file mode 100644 index 00000000..0d3dfbd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_other +task: + - global_mmlu_full_ne_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml new file mode 100644 index 00000000..f1f09f00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_social_sciences +task: + - global_mmlu_full_ne_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml new file mode 100644 index 00000000..eebc1cac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ne_stem +task: + - global_mmlu_full_ne_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml b/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml new file mode 100644 index 00000000..25f8daec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/_ne_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ne +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml new file mode 100644 index 00000000..48bf7bb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml new file mode 100644 index 00000000..0f66f8ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml new file mode 100644 index 00000000..a02aaf30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml new file mode 100644 index 00000000..d87f5b98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml new file mode 100644 index 00000000..f27eb4e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml new file mode 100644 index 00000000..d26edef8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml new file mode 100644 index 00000000..88b8bd86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml new file mode 100644 index 00000000..51909ffc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml new file mode 100644 index 00000000..40b9cb79 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml new file mode 100644 index 00000000..81f81f84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml new file mode 100644 index 00000000..09798c09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml new file mode 100644 index 00000000..49d89dd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml new file mode 100644 index 00000000..94bfec4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml new file mode 100644 index 00000000..81d6ed98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml new file mode 100644 index 00000000..73ad1a34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml new file mode 100644 index 00000000..cbc3bacd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml new file mode 100644 index 00000000..225da2fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml new file mode 100644 index 00000000..6f5e9f1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml new file mode 100644 index 00000000..a8c0436a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml new file mode 100644 index 00000000..405661c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml new file mode 100644 index 00000000..6cff5ba6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml new file mode 100644 index 00000000..4f7eb3ee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml new file mode 100644 index 00000000..0453e51d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml new file mode 100644 index 00000000..05710100 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fd68d5f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml new file mode 100644 index 00000000..39ef0a58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml new file mode 100644 index 00000000..535a3918 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml new file mode 100644 index 00000000..f355dad2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml new file mode 100644 index 00000000..a52d7a01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml new file mode 100644 index 00000000..5a256420 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml new file mode 100644 index 00000000..9e1199b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml new file mode 100644 index 00000000..afc2135b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml new file mode 100644 index 00000000..18450534 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml new file mode 100644 index 00000000..7d23b839 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml new file mode 100644 index 00000000..5be599d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml new file mode 100644 index 00000000..180a397c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml new file mode 100644 index 00000000..3aa369a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml new file mode 100644 index 00000000..4e08abda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ne_stem_tasks +task: global_mmlu_full_ne_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml new file mode 100644 index 00000000..e44c5be6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_management diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml new file mode 100644 index 00000000..10f7daa2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml new file mode 100644 index 00000000..8139b1f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml new file mode 100644 index 00000000..cb1bf905 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml new file mode 100644 index 00000000..1b74fb36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml new file mode 100644 index 00000000..91f8f06c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml new file mode 100644 index 00000000..575f0e45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml new file mode 100644 index 00000000..95fdd0eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml new file mode 100644 index 00000000..e6e5c706 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml new file mode 100644 index 00000000..718cedee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml new file mode 100644 index 00000000..89c70160 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml new file mode 100644 index 00000000..a366e0c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml new file mode 100644 index 00000000..649e5343 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml new file mode 100644 index 00000000..37f2ddea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml new file mode 100644 index 00000000..55f80904 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml new file mode 100644 index 00000000..78161d5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml new file mode 100644 index 00000000..c38f59c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ne_social_sciences_tasks +task: global_mmlu_full_ne_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml new file mode 100644 index 00000000..0c15808f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ne_other_tasks +task: global_mmlu_full_ne_virology diff --git a/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml new file mode 100644 index 00000000..5c6163f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/global_mmlu_full_ne_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ne_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ne_humanities_tasks +task: global_mmlu_full_ne_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ne/utils.py b/lm_eval/tasks/global_mmlu/full/ne/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ne/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml new file mode 100644 index 00000000..44f562da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_nl +task: + - global_mmlu_full_nl_stem + - global_mmlu_full_nl_other + - global_mmlu_full_nl_social_sciences + - global_mmlu_full_nl_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml new file mode 100644 index 00000000..656a421b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_humanities +task: + - global_mmlu_full_nl_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml new file mode 100644 index 00000000..23a42201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_other +task: + - global_mmlu_full_nl_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml new file mode 100644 index 00000000..afba5678 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_social_sciences +task: + - global_mmlu_full_nl_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml new file mode 100644 index 00000000..9658b13e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_nl_stem +task: + - global_mmlu_full_nl_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml b/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml new file mode 100644 index 00000000..39efbfd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/_nl_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: nl +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml new file mode 100644 index 00000000..458a3614 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml new file mode 100644 index 00000000..e4cbd90e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml new file mode 100644 index 00000000..84cdf578 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml new file mode 100644 index 00000000..f75776f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml new file mode 100644 index 00000000..6e963d0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml new file mode 100644 index 00000000..e4a3660b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml new file mode 100644 index 00000000..fa9faed9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml new file mode 100644 index 00000000..b603c309 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml new file mode 100644 index 00000000..f55207ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml new file mode 100644 index 00000000..5cdda1b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml new file mode 100644 index 00000000..26d70230 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml new file mode 100644 index 00000000..01a8a747 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml new file mode 100644 index 00000000..cccd2666 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml new file mode 100644 index 00000000..22ad59bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml new file mode 100644 index 00000000..3aca226f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml new file mode 100644 index 00000000..2118a1d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml new file mode 100644 index 00000000..5fd86105 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml new file mode 100644 index 00000000..d7147d51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml new file mode 100644 index 00000000..271b54f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml new file mode 100644 index 00000000..921abd17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml new file mode 100644 index 00000000..ea190bea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml new file mode 100644 index 00000000..c348d482 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml new file mode 100644 index 00000000..de31a63b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml new file mode 100644 index 00000000..bc0e3cb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml new file mode 100644 index 00000000..2e221c68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml new file mode 100644 index 00000000..137158a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml new file mode 100644 index 00000000..27b426c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml new file mode 100644 index 00000000..746df49e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml new file mode 100644 index 00000000..89cb42d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml new file mode 100644 index 00000000..e27082c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml new file mode 100644 index 00000000..66efc58c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml new file mode 100644 index 00000000..83b65345 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml new file mode 100644 index 00000000..82e00b4b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml new file mode 100644 index 00000000..468589da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml new file mode 100644 index 00000000..e5bf62a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_international_law diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml new file mode 100644 index 00000000..7b533613 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml new file mode 100644 index 00000000..de862b66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml new file mode 100644 index 00000000..c205af00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_nl_stem_tasks +task: global_mmlu_full_nl_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml new file mode 100644 index 00000000..5b624af8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_management diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml new file mode 100644 index 00000000..81658e9f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_marketing diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml new file mode 100644 index 00000000..f8e52c0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml new file mode 100644 index 00000000..31af482e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml new file mode 100644 index 00000000..853de0c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml new file mode 100644 index 00000000..8b86e045 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml new file mode 100644 index 00000000..96036dae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml new file mode 100644 index 00000000..84e827dd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml new file mode 100644 index 00000000..f49c8a5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml new file mode 100644 index 00000000..45484116 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml new file mode 100644 index 00000000..17b28cd3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml new file mode 100644 index 00000000..f4db01bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml new file mode 100644 index 00000000..be586b45 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml new file mode 100644 index 00000000..2ffe5848 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml new file mode 100644 index 00000000..b6c76948 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml new file mode 100644 index 00000000..983e13cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_sociology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml new file mode 100644 index 00000000..bd6b6227 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_nl_social_sciences_tasks +task: global_mmlu_full_nl_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml new file mode 100644 index 00000000..92d1973b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_nl_other_tasks +task: global_mmlu_full_nl_virology diff --git a/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml new file mode 100644 index 00000000..a8c2ecca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/global_mmlu_full_nl_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _nl_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_nl_humanities_tasks +task: global_mmlu_full_nl_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/nl/utils.py b/lm_eval/tasks/global_mmlu/full/nl/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/nl/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml new file mode 100644 index 00000000..c325bf1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ny +task: + - global_mmlu_full_ny_stem + - global_mmlu_full_ny_other + - global_mmlu_full_ny_social_sciences + - global_mmlu_full_ny_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml new file mode 100644 index 00000000..89e7618f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_humanities +task: + - global_mmlu_full_ny_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml new file mode 100644 index 00000000..51b90446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_other +task: + - global_mmlu_full_ny_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml new file mode 100644 index 00000000..b711dfdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_social_sciences +task: + - global_mmlu_full_ny_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml new file mode 100644 index 00000000..99bf9d95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ny_stem +task: + - global_mmlu_full_ny_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml b/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml new file mode 100644 index 00000000..069a9446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/_ny_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ny +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml new file mode 100644 index 00000000..2e3d7c33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml new file mode 100644 index 00000000..60806afc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml new file mode 100644 index 00000000..afbcb482 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml new file mode 100644 index 00000000..6f8981bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml new file mode 100644 index 00000000..ff44dd67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml new file mode 100644 index 00000000..da5ce370 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml new file mode 100644 index 00000000..d62bce83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml new file mode 100644 index 00000000..48cd98d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml new file mode 100644 index 00000000..ed77ba9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml new file mode 100644 index 00000000..9cd8aa2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml new file mode 100644 index 00000000..66d5dc27 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml new file mode 100644 index 00000000..8a9dae62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml new file mode 100644 index 00000000..8d160ffc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml new file mode 100644 index 00000000..88af709a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml new file mode 100644 index 00000000..d835f1e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml new file mode 100644 index 00000000..558ffd0b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml new file mode 100644 index 00000000..cce0df19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml new file mode 100644 index 00000000..6ce027a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml new file mode 100644 index 00000000..a729008d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml new file mode 100644 index 00000000..79771bfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml new file mode 100644 index 00000000..6889806f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml new file mode 100644 index 00000000..29e6e4a5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml new file mode 100644 index 00000000..447db75f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e543cf76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml new file mode 100644 index 00000000..61c49e75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml new file mode 100644 index 00000000..db228d02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml new file mode 100644 index 00000000..62d87c86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml new file mode 100644 index 00000000..54c15d66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml new file mode 100644 index 00000000..4f7d8b5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml new file mode 100644 index 00000000..f53235b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml new file mode 100644 index 00000000..1d413b98 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml new file mode 100644 index 00000000..4adf2e8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml new file mode 100644 index 00000000..9660b7b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml new file mode 100644 index 00000000..11a6f2d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml new file mode 100644 index 00000000..9a46ff6a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml new file mode 100644 index 00000000..e4606df5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml new file mode 100644 index 00000000..6edade03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml new file mode 100644 index 00000000..765b2201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ny_stem_tasks +task: global_mmlu_full_ny_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml new file mode 100644 index 00000000..a699a70d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_management diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml new file mode 100644 index 00000000..596d6937 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml new file mode 100644 index 00000000..4fae66a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml new file mode 100644 index 00000000..8555e173 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml new file mode 100644 index 00000000..b64f4d9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml new file mode 100644 index 00000000..c73f9f1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml new file mode 100644 index 00000000..456f4cb6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml new file mode 100644 index 00000000..d0e0e05e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml new file mode 100644 index 00000000..d65c6be1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml new file mode 100644 index 00000000..c152c80e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml new file mode 100644 index 00000000..d5e2c7b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml new file mode 100644 index 00000000..cacd5df7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml new file mode 100644 index 00000000..ffdd86d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml new file mode 100644 index 00000000..0e6b5ab8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml new file mode 100644 index 00000000..f894fdd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml new file mode 100644 index 00000000..1d2d0cd4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml new file mode 100644 index 00000000..a72a237d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ny_social_sciences_tasks +task: global_mmlu_full_ny_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml new file mode 100644 index 00000000..9eeb7cf0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ny_other_tasks +task: global_mmlu_full_ny_virology diff --git a/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml new file mode 100644 index 00000000..a1c243c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/global_mmlu_full_ny_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ny_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ny_humanities_tasks +task: global_mmlu_full_ny_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ny/utils.py b/lm_eval/tasks/global_mmlu/full/ny/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ny/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml new file mode 100644 index 00000000..2476fd33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_pl +task: + - global_mmlu_full_pl_stem + - global_mmlu_full_pl_other + - global_mmlu_full_pl_social_sciences + - global_mmlu_full_pl_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml new file mode 100644 index 00000000..4b5f7aa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_humanities +task: + - global_mmlu_full_pl_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml new file mode 100644 index 00000000..241dbc1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_other +task: + - global_mmlu_full_pl_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml new file mode 100644 index 00000000..9a50a315 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_social_sciences +task: + - global_mmlu_full_pl_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml new file mode 100644 index 00000000..3d11c89f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pl_stem +task: + - global_mmlu_full_pl_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml b/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml new file mode 100644 index 00000000..af8809dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/_pl_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: pl +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml new file mode 100644 index 00000000..37f611a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml new file mode 100644 index 00000000..c274bce1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml new file mode 100644 index 00000000..99220f0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml new file mode 100644 index 00000000..10592668 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml new file mode 100644 index 00000000..29a4fadc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml new file mode 100644 index 00000000..cce1671c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml new file mode 100644 index 00000000..79c63530 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml new file mode 100644 index 00000000..bb630140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml new file mode 100644 index 00000000..6b42f767 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml new file mode 100644 index 00000000..43bea976 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml new file mode 100644 index 00000000..0c9ea601 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml new file mode 100644 index 00000000..365b60a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml new file mode 100644 index 00000000..2b9437e3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml new file mode 100644 index 00000000..648f24c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml new file mode 100644 index 00000000..196de258 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml new file mode 100644 index 00000000..8646b6a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml new file mode 100644 index 00000000..2d13d283 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml new file mode 100644 index 00000000..15bb640b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml new file mode 100644 index 00000000..ba964028 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml new file mode 100644 index 00000000..7f142dd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml new file mode 100644 index 00000000..99b3b9da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml new file mode 100644 index 00000000..e99b2fb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml new file mode 100644 index 00000000..bc6113f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml new file mode 100644 index 00000000..05a7de9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml new file mode 100644 index 00000000..aceda633 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml new file mode 100644 index 00000000..6eef2cd8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml new file mode 100644 index 00000000..5adb5fa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml new file mode 100644 index 00000000..fbda7920 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml new file mode 100644 index 00000000..7eb09362 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml new file mode 100644 index 00000000..b7beef5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml new file mode 100644 index 00000000..08f45dd9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml new file mode 100644 index 00000000..99664de8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml new file mode 100644 index 00000000..d63f6f8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml new file mode 100644 index 00000000..8080ca8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml new file mode 100644 index 00000000..425695c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_international_law diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml new file mode 100644 index 00000000..a6455bd7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml new file mode 100644 index 00000000..f1359b3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml new file mode 100644 index 00000000..3d7bb0dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_pl_stem_tasks +task: global_mmlu_full_pl_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml new file mode 100644 index 00000000..f695226c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_management diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml new file mode 100644 index 00000000..7fedcd3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_marketing diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml new file mode 100644 index 00000000..89da9f67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml new file mode 100644 index 00000000..6f34762c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml new file mode 100644 index 00000000..25f201f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml new file mode 100644 index 00000000..fd08e6e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml new file mode 100644 index 00000000..b61f1f17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml new file mode 100644 index 00000000..8c1bf6dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml new file mode 100644 index 00000000..e5329e13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml new file mode 100644 index 00000000..514b04cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml new file mode 100644 index 00000000..99c719f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml new file mode 100644 index 00000000..1dfafb25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml new file mode 100644 index 00000000..5b6181c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml new file mode 100644 index 00000000..acf874db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml new file mode 100644 index 00000000..d754904c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml new file mode 100644 index 00000000..4bc0fd8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_sociology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml new file mode 100644 index 00000000..ef719be1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_pl_social_sciences_tasks +task: global_mmlu_full_pl_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml new file mode 100644 index 00000000..f9084c13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_pl_other_tasks +task: global_mmlu_full_pl_virology diff --git a/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml new file mode 100644 index 00000000..036d0f4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/global_mmlu_full_pl_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pl_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_pl_humanities_tasks +task: global_mmlu_full_pl_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/pl/utils.py b/lm_eval/tasks/global_mmlu/full/pl/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pl/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml new file mode 100644 index 00000000..ac79bda1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_pt +task: + - global_mmlu_full_pt_stem + - global_mmlu_full_pt_other + - global_mmlu_full_pt_social_sciences + - global_mmlu_full_pt_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml new file mode 100644 index 00000000..261a7028 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_humanities +task: + - global_mmlu_full_pt_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml new file mode 100644 index 00000000..a61b12f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_other +task: + - global_mmlu_full_pt_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml new file mode 100644 index 00000000..2c04bf5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_social_sciences +task: + - global_mmlu_full_pt_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml new file mode 100644 index 00000000..dc3d3610 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_pt_stem +task: + - global_mmlu_full_pt_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml b/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml new file mode 100644 index 00000000..66ba2417 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/_pt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: pt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml new file mode 100644 index 00000000..d9efd817 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml new file mode 100644 index 00000000..45390503 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml new file mode 100644 index 00000000..90880cd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml new file mode 100644 index 00000000..f18ef2d8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml new file mode 100644 index 00000000..2999a02a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml new file mode 100644 index 00000000..0cf0a61b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml new file mode 100644 index 00000000..91d8cd2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml new file mode 100644 index 00000000..68592aaf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml new file mode 100644 index 00000000..31d7f6af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml new file mode 100644 index 00000000..46ec8232 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml new file mode 100644 index 00000000..2cf6402d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml new file mode 100644 index 00000000..0953a105 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml new file mode 100644 index 00000000..0e6e91a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml new file mode 100644 index 00000000..67c29915 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml new file mode 100644 index 00000000..5a6ba82e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml new file mode 100644 index 00000000..3d66a664 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml new file mode 100644 index 00000000..683d6ddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml new file mode 100644 index 00000000..e4396542 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml new file mode 100644 index 00000000..89fefd1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml new file mode 100644 index 00000000..ea323d8a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml new file mode 100644 index 00000000..5f8f0082 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml new file mode 100644 index 00000000..bef7a316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml new file mode 100644 index 00000000..e69c2978 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml new file mode 100644 index 00000000..e3fa920d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml new file mode 100644 index 00000000..6b7ca2f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml new file mode 100644 index 00000000..4713674d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml new file mode 100644 index 00000000..d6475e99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml new file mode 100644 index 00000000..9eaed31a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml new file mode 100644 index 00000000..d09e1eb9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml new file mode 100644 index 00000000..3d8c1447 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml new file mode 100644 index 00000000..a883b438 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml new file mode 100644 index 00000000..6ea1454e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml new file mode 100644 index 00000000..34033c55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml new file mode 100644 index 00000000..bf961c33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml new file mode 100644 index 00000000..5247fc9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_international_law diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml new file mode 100644 index 00000000..07e78da5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml new file mode 100644 index 00000000..c2451399 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml new file mode 100644 index 00000000..79c577ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_pt_stem_tasks +task: global_mmlu_full_pt_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml new file mode 100644 index 00000000..a344b1c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_management diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml new file mode 100644 index 00000000..eeff36b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_marketing diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml new file mode 100644 index 00000000..27985380 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml new file mode 100644 index 00000000..e2fa1da1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml new file mode 100644 index 00000000..e83d186e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml new file mode 100644 index 00000000..3529a15c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml new file mode 100644 index 00000000..e51eefe0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml new file mode 100644 index 00000000..ec0826b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml new file mode 100644 index 00000000..324dfe69 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml new file mode 100644 index 00000000..530c918e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml new file mode 100644 index 00000000..f7a3679c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml new file mode 100644 index 00000000..0f4cc006 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml new file mode 100644 index 00000000..4c5884c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml new file mode 100644 index 00000000..bb2d6536 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml new file mode 100644 index 00000000..1af8d662 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml new file mode 100644 index 00000000..3ef8fcb7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_sociology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml new file mode 100644 index 00000000..8b48f528 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_pt_social_sciences_tasks +task: global_mmlu_full_pt_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml new file mode 100644 index 00000000..4b0de753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_pt_other_tasks +task: global_mmlu_full_pt_virology diff --git a/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml new file mode 100644 index 00000000..79648586 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/global_mmlu_full_pt_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_pt_humanities_tasks +task: global_mmlu_full_pt_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/pt/utils.py b/lm_eval/tasks/global_mmlu/full/pt/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/pt/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml new file mode 100644 index 00000000..b3aa5f49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ro +task: + - global_mmlu_full_ro_stem + - global_mmlu_full_ro_other + - global_mmlu_full_ro_social_sciences + - global_mmlu_full_ro_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml new file mode 100644 index 00000000..d54268b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_humanities +task: + - global_mmlu_full_ro_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml new file mode 100644 index 00000000..4e58aea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_other +task: + - global_mmlu_full_ro_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml new file mode 100644 index 00000000..e1cb84a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_social_sciences +task: + - global_mmlu_full_ro_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml new file mode 100644 index 00000000..de0e406f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ro_stem +task: + - global_mmlu_full_ro_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml b/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml new file mode 100644 index 00000000..e5cb6dd0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/_ro_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ro +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml new file mode 100644 index 00000000..c505fb8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml new file mode 100644 index 00000000..0c13018c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml new file mode 100644 index 00000000..9f4caefb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml new file mode 100644 index 00000000..1c1387fd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml new file mode 100644 index 00000000..b9e0dbb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml new file mode 100644 index 00000000..5bf14ab0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml new file mode 100644 index 00000000..59034744 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml new file mode 100644 index 00000000..6bb64c2e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml new file mode 100644 index 00000000..d719a5ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml new file mode 100644 index 00000000..c9284a8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml new file mode 100644 index 00000000..1d27d843 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml new file mode 100644 index 00000000..1d63556e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml new file mode 100644 index 00000000..25f30a36 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml new file mode 100644 index 00000000..1fa6b5d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml new file mode 100644 index 00000000..f6eb4b6e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml new file mode 100644 index 00000000..e99772e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml new file mode 100644 index 00000000..be99bd00 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml new file mode 100644 index 00000000..819937e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml new file mode 100644 index 00000000..d7509581 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml new file mode 100644 index 00000000..d089583f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml new file mode 100644 index 00000000..46d5f472 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml new file mode 100644 index 00000000..1a1ae7e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml new file mode 100644 index 00000000..92935be5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml new file mode 100644 index 00000000..efd2a03f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fe2f97d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml new file mode 100644 index 00000000..f0432a01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml new file mode 100644 index 00000000..507fab86 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml new file mode 100644 index 00000000..19a76707 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml new file mode 100644 index 00000000..d27fc262 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml new file mode 100644 index 00000000..8f8023bc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml new file mode 100644 index 00000000..acc5fc41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml new file mode 100644 index 00000000..9ea7c933 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml new file mode 100644 index 00000000..6b984c55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml new file mode 100644 index 00000000..e2af2cbe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml new file mode 100644 index 00000000..1cbf3d03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml new file mode 100644 index 00000000..d0acaca0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml new file mode 100644 index 00000000..c84234a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml new file mode 100644 index 00000000..09237c9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ro_stem_tasks +task: global_mmlu_full_ro_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml new file mode 100644 index 00000000..fcb3f485 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_management diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml new file mode 100644 index 00000000..33b486c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml new file mode 100644 index 00000000..09c3d5e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml new file mode 100644 index 00000000..e744e1e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml new file mode 100644 index 00000000..4e6d4ed7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml new file mode 100644 index 00000000..d0e99149 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml new file mode 100644 index 00000000..850262c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml new file mode 100644 index 00000000..9dd2bf54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml new file mode 100644 index 00000000..b2ecf40d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml new file mode 100644 index 00000000..db259766 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml new file mode 100644 index 00000000..b1e43974 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml new file mode 100644 index 00000000..0158c545 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml new file mode 100644 index 00000000..bdd7ca7f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml new file mode 100644 index 00000000..5f7f0f51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml new file mode 100644 index 00000000..be9b334e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml new file mode 100644 index 00000000..f37228bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml new file mode 100644 index 00000000..aae05dc9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ro_social_sciences_tasks +task: global_mmlu_full_ro_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml new file mode 100644 index 00000000..2d789c20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ro_other_tasks +task: global_mmlu_full_ro_virology diff --git a/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml new file mode 100644 index 00000000..40ff8228 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/global_mmlu_full_ro_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ro_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ro_humanities_tasks +task: global_mmlu_full_ro_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ro/utils.py b/lm_eval/tasks/global_mmlu/full/ro/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ro/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml new file mode 100644 index 00000000..cc63cd34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_ru +task: + - global_mmlu_full_ru_stem + - global_mmlu_full_ru_other + - global_mmlu_full_ru_social_sciences + - global_mmlu_full_ru_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml new file mode 100644 index 00000000..55422b43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_humanities +task: + - global_mmlu_full_ru_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml new file mode 100644 index 00000000..d47ccc60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_other +task: + - global_mmlu_full_ru_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml new file mode 100644 index 00000000..12d48428 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_social_sciences +task: + - global_mmlu_full_ru_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml new file mode 100644 index 00000000..70ae3edb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_ru_stem +task: + - global_mmlu_full_ru_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml b/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml new file mode 100644 index 00000000..4b2f491b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/_ru_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: ru +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml new file mode 100644 index 00000000..de158df8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml new file mode 100644 index 00000000..aab717e4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml new file mode 100644 index 00000000..3d8d0e32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml new file mode 100644 index 00000000..d2855ca3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml new file mode 100644 index 00000000..2efe0829 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml new file mode 100644 index 00000000..96d00deb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml new file mode 100644 index 00000000..0a5aac35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml new file mode 100644 index 00000000..bd8bf28b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml new file mode 100644 index 00000000..a2e080c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml new file mode 100644 index 00000000..70e8448e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml new file mode 100644 index 00000000..8e6ecbcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml new file mode 100644 index 00000000..f196351a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml new file mode 100644 index 00000000..e623d78f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml new file mode 100644 index 00000000..df35a1f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml new file mode 100644 index 00000000..82c49f89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml new file mode 100644 index 00000000..6ed11c5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml new file mode 100644 index 00000000..8ebe62bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml new file mode 100644 index 00000000..27d6ad70 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml new file mode 100644 index 00000000..7860e73e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml new file mode 100644 index 00000000..7596daa3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml new file mode 100644 index 00000000..ecb64d52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml new file mode 100644 index 00000000..92feccc5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml new file mode 100644 index 00000000..6f586f50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml new file mode 100644 index 00000000..0ffc85df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml new file mode 100644 index 00000000..5da13204 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml new file mode 100644 index 00000000..fc684975 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml new file mode 100644 index 00000000..84887d18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml new file mode 100644 index 00000000..29ddf5bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml new file mode 100644 index 00000000..a0680bad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml new file mode 100644 index 00000000..07ac341b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml new file mode 100644 index 00000000..18e12bcd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml new file mode 100644 index 00000000..c37522a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml new file mode 100644 index 00000000..cbd6bf32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml new file mode 100644 index 00000000..8766c348 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml new file mode 100644 index 00000000..4edbb98c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_international_law diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml new file mode 100644 index 00000000..24cea632 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml new file mode 100644 index 00000000..3160fadc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml new file mode 100644 index 00000000..b8e480e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_ru_stem_tasks +task: global_mmlu_full_ru_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml new file mode 100644 index 00000000..4a7b77a1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_management diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml new file mode 100644 index 00000000..c71a4f29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_marketing diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml new file mode 100644 index 00000000..ac34ba20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml new file mode 100644 index 00000000..6049ccb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml new file mode 100644 index 00000000..d974ccfa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml new file mode 100644 index 00000000..f05f7de9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml new file mode 100644 index 00000000..59cc8dee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml new file mode 100644 index 00000000..eb78b1f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml new file mode 100644 index 00000000..685bb2a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml new file mode 100644 index 00000000..35c21255 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml new file mode 100644 index 00000000..ce70d006 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml new file mode 100644 index 00000000..cce88d1d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml new file mode 100644 index 00000000..39fc8953 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml new file mode 100644 index 00000000..3dfd71cc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml new file mode 100644 index 00000000..bd08ea34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml new file mode 100644 index 00000000..ef616ee1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_sociology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml new file mode 100644 index 00000000..c8244e65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_ru_social_sciences_tasks +task: global_mmlu_full_ru_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml new file mode 100644 index 00000000..2f4df810 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_ru_other_tasks +task: global_mmlu_full_ru_virology diff --git a/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml new file mode 100644 index 00000000..06f71986 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/global_mmlu_full_ru_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _ru_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_ru_humanities_tasks +task: global_mmlu_full_ru_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/ru/utils.py b/lm_eval/tasks/global_mmlu/full/ru/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/ru/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml new file mode 100644 index 00000000..4deed570 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_si +task: + - global_mmlu_full_si_stem + - global_mmlu_full_si_other + - global_mmlu_full_si_social_sciences + - global_mmlu_full_si_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml new file mode 100644 index 00000000..b97994d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_humanities +task: + - global_mmlu_full_si_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml new file mode 100644 index 00000000..e7600ca4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_other +task: + - global_mmlu_full_si_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml new file mode 100644 index 00000000..4e2351a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_social_sciences +task: + - global_mmlu_full_si_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml new file mode 100644 index 00000000..8878bf80 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_si_stem +task: + - global_mmlu_full_si_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml b/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml new file mode 100644 index 00000000..5c775b20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/_si_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: si +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml new file mode 100644 index 00000000..b81c5803 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml new file mode 100644 index 00000000..32315245 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml new file mode 100644 index 00000000..c7ab9539 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml new file mode 100644 index 00000000..8281fc42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml new file mode 100644 index 00000000..2a7f5cf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml new file mode 100644 index 00000000..e54148da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml new file mode 100644 index 00000000..b797ac60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml new file mode 100644 index 00000000..ba69de35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml new file mode 100644 index 00000000..65ed9424 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml new file mode 100644 index 00000000..1418aa0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml new file mode 100644 index 00000000..cb32cd4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml new file mode 100644 index 00000000..ce5ab9b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml new file mode 100644 index 00000000..c2ab5718 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml new file mode 100644 index 00000000..5e764903 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml new file mode 100644 index 00000000..99679bb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml new file mode 100644 index 00000000..553bc9bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml new file mode 100644 index 00000000..112814b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml new file mode 100644 index 00000000..008b5537 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml new file mode 100644 index 00000000..fecd995a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml new file mode 100644 index 00000000..3d3018b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml new file mode 100644 index 00000000..e80a1f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml new file mode 100644 index 00000000..10e15738 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml new file mode 100644 index 00000000..12d90b97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml new file mode 100644 index 00000000..d285c2c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml new file mode 100644 index 00000000..1c85f2df --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml new file mode 100644 index 00000000..b292fa50 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml new file mode 100644 index 00000000..ada74f5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml new file mode 100644 index 00000000..84bbda28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml new file mode 100644 index 00000000..7c378798 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml new file mode 100644 index 00000000..13758f22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml new file mode 100644 index 00000000..0fe85e14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml new file mode 100644 index 00000000..8afaa392 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml new file mode 100644 index 00000000..2cf69a68 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml new file mode 100644 index 00000000..418927d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml new file mode 100644 index 00000000..de0a611d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_international_law diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml new file mode 100644 index 00000000..10212173 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml new file mode 100644 index 00000000..d31372ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml new file mode 100644 index 00000000..0e3d0e7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_si_stem_tasks +task: global_mmlu_full_si_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml new file mode 100644 index 00000000..f4e29c9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_management diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml new file mode 100644 index 00000000..8dff414a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_marketing diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml new file mode 100644 index 00000000..6160f02b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml new file mode 100644 index 00000000..de1db6c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml new file mode 100644 index 00000000..d48cf75c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml new file mode 100644 index 00000000..5d08b811 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml new file mode 100644 index 00000000..3163db49 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml new file mode 100644 index 00000000..f809bddd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml new file mode 100644 index 00000000..964e6ab7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml new file mode 100644 index 00000000..c04e0bbc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml new file mode 100644 index 00000000..6542f14e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml new file mode 100644 index 00000000..38448979 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml new file mode 100644 index 00000000..80f36885 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml new file mode 100644 index 00000000..2ac5169e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml new file mode 100644 index 00000000..21423506 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml new file mode 100644 index 00000000..c86ee0a3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_sociology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml new file mode 100644 index 00000000..28c238e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_si_social_sciences_tasks +task: global_mmlu_full_si_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml new file mode 100644 index 00000000..a1935460 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_si_other_tasks +task: global_mmlu_full_si_virology diff --git a/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml new file mode 100644 index 00000000..424c23c2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/global_mmlu_full_si_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _si_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_si_humanities_tasks +task: global_mmlu_full_si_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/si/utils.py b/lm_eval/tasks/global_mmlu/full/si/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/si/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml new file mode 100644 index 00000000..98ced987 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sn +task: + - global_mmlu_full_sn_stem + - global_mmlu_full_sn_other + - global_mmlu_full_sn_social_sciences + - global_mmlu_full_sn_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml new file mode 100644 index 00000000..69690862 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_humanities +task: + - global_mmlu_full_sn_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml new file mode 100644 index 00000000..18e750b6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_other +task: + - global_mmlu_full_sn_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml new file mode 100644 index 00000000..a8e76215 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_social_sciences +task: + - global_mmlu_full_sn_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml new file mode 100644 index 00000000..b3136233 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sn_stem +task: + - global_mmlu_full_sn_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml b/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml new file mode 100644 index 00000000..30d50ba0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/_sn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml new file mode 100644 index 00000000..c4de495e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml new file mode 100644 index 00000000..1ef227aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml new file mode 100644 index 00000000..8662ab96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml new file mode 100644 index 00000000..6f4741c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml new file mode 100644 index 00000000..7477170e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml new file mode 100644 index 00000000..6d0ec277 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml new file mode 100644 index 00000000..9f0c4f42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml new file mode 100644 index 00000000..c8651ee1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml new file mode 100644 index 00000000..c1d1a98e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml new file mode 100644 index 00000000..d9ce08f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml new file mode 100644 index 00000000..ae34a82a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml new file mode 100644 index 00000000..4b41c175 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml new file mode 100644 index 00000000..5aaa8a78 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml new file mode 100644 index 00000000..8606e96c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml new file mode 100644 index 00000000..9c57f703 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml new file mode 100644 index 00000000..0ed5b400 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml new file mode 100644 index 00000000..55dafc2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml new file mode 100644 index 00000000..5b8ee96f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml new file mode 100644 index 00000000..2597a7d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml new file mode 100644 index 00000000..1e6be4e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml new file mode 100644 index 00000000..446da912 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml new file mode 100644 index 00000000..dd8cf61c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml new file mode 100644 index 00000000..2e178adf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml new file mode 100644 index 00000000..1ac4efda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml new file mode 100644 index 00000000..23ca0b41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml new file mode 100644 index 00000000..0bd9be19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml new file mode 100644 index 00000000..916e14ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml new file mode 100644 index 00000000..b6a3e60c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml new file mode 100644 index 00000000..62a197c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml new file mode 100644 index 00000000..815cb60b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml new file mode 100644 index 00000000..ff9f970e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml new file mode 100644 index 00000000..b2dedc38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml new file mode 100644 index 00000000..0ef13930 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml new file mode 100644 index 00000000..a52c2ded --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml new file mode 100644 index 00000000..648c3dea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml new file mode 100644 index 00000000..ca63c411 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml new file mode 100644 index 00000000..d74a7f18 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml new file mode 100644 index 00000000..db272b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sn_stem_tasks +task: global_mmlu_full_sn_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml new file mode 100644 index 00000000..db3bee4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_management diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml new file mode 100644 index 00000000..a700c4e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml new file mode 100644 index 00000000..b826b187 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml new file mode 100644 index 00000000..dea895aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml new file mode 100644 index 00000000..b641f6b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml new file mode 100644 index 00000000..2951a953 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml new file mode 100644 index 00000000..9816d8b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml new file mode 100644 index 00000000..4ea10505 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml new file mode 100644 index 00000000..e941437b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml new file mode 100644 index 00000000..057a197d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml new file mode 100644 index 00000000..72c9fac7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml new file mode 100644 index 00000000..e727b3cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml new file mode 100644 index 00000000..341322d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml new file mode 100644 index 00000000..5448baa4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml new file mode 100644 index 00000000..542c709a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml new file mode 100644 index 00000000..f2913db5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml new file mode 100644 index 00000000..ad476847 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sn_social_sciences_tasks +task: global_mmlu_full_sn_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml new file mode 100644 index 00000000..254fedb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sn_other_tasks +task: global_mmlu_full_sn_virology diff --git a/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml new file mode 100644 index 00000000..2aef6dfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/global_mmlu_full_sn_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sn_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sn_humanities_tasks +task: global_mmlu_full_sn_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sn/utils.py b/lm_eval/tasks/global_mmlu/full/sn/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sn/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml new file mode 100644 index 00000000..014a4121 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_so +task: + - global_mmlu_full_so_stem + - global_mmlu_full_so_other + - global_mmlu_full_so_social_sciences + - global_mmlu_full_so_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml new file mode 100644 index 00000000..ff78bfab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_humanities +task: + - global_mmlu_full_so_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml new file mode 100644 index 00000000..eec8e661 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_other +task: + - global_mmlu_full_so_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml new file mode 100644 index 00000000..9d00ea1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_social_sciences +task: + - global_mmlu_full_so_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml new file mode 100644 index 00000000..497b9b01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_so_stem +task: + - global_mmlu_full_so_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml b/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml new file mode 100644 index 00000000..fb052a63 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/_so_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: so +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml new file mode 100644 index 00000000..afb5d908 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml new file mode 100644 index 00000000..79f3446d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml new file mode 100644 index 00000000..54a2faa0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml new file mode 100644 index 00000000..65bc598c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml new file mode 100644 index 00000000..224aa39b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml new file mode 100644 index 00000000..758d22c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml new file mode 100644 index 00000000..35c22430 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml new file mode 100644 index 00000000..86428ae8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml new file mode 100644 index 00000000..f9957a23 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml new file mode 100644 index 00000000..f51a1b12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml new file mode 100644 index 00000000..43388d6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml new file mode 100644 index 00000000..8a556330 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml new file mode 100644 index 00000000..97dfa147 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml new file mode 100644 index 00000000..9792659f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml new file mode 100644 index 00000000..3ed44e41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml new file mode 100644 index 00000000..76628481 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml new file mode 100644 index 00000000..4b7645c8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml new file mode 100644 index 00000000..fa75e666 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml new file mode 100644 index 00000000..d3ad29d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml new file mode 100644 index 00000000..274af23b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml new file mode 100644 index 00000000..6bce30d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml new file mode 100644 index 00000000..cfc44f08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml new file mode 100644 index 00000000..55479c39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml new file mode 100644 index 00000000..ceb5a701 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml new file mode 100644 index 00000000..0c403ec5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml new file mode 100644 index 00000000..e8089bdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml new file mode 100644 index 00000000..32cacffe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml new file mode 100644 index 00000000..fd2c35ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml new file mode 100644 index 00000000..26f2cb3c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml new file mode 100644 index 00000000..730075b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml new file mode 100644 index 00000000..c9702a66 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml new file mode 100644 index 00000000..78a21d5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml new file mode 100644 index 00000000..c95b5562 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml new file mode 100644 index 00000000..632778d3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml new file mode 100644 index 00000000..2d5ab1c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_international_law diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml new file mode 100644 index 00000000..1372a1d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml new file mode 100644 index 00000000..19a1120e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml new file mode 100644 index 00000000..c1e13dda --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_so_stem_tasks +task: global_mmlu_full_so_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml new file mode 100644 index 00000000..6e325205 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_management diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml new file mode 100644 index 00000000..8b1c002f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_marketing diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml new file mode 100644 index 00000000..c0136dc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml new file mode 100644 index 00000000..2b8a33ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml new file mode 100644 index 00000000..c1bd0011 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml new file mode 100644 index 00000000..60418a65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml new file mode 100644 index 00000000..5aa40241 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml new file mode 100644 index 00000000..421a9801 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml new file mode 100644 index 00000000..721bfbf2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml new file mode 100644 index 00000000..4ca0c5c9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml new file mode 100644 index 00000000..7f57b594 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml new file mode 100644 index 00000000..a7d6408e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml new file mode 100644 index 00000000..a03de5bb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml new file mode 100644 index 00000000..f7af81e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml new file mode 100644 index 00000000..b52ee259 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml new file mode 100644 index 00000000..7f3847e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_sociology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml new file mode 100644 index 00000000..a6017167 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_so_social_sciences_tasks +task: global_mmlu_full_so_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml new file mode 100644 index 00000000..2dc85b32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_so_other_tasks +task: global_mmlu_full_so_virology diff --git a/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml new file mode 100644 index 00000000..9ca99e5b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/global_mmlu_full_so_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _so_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_so_humanities_tasks +task: global_mmlu_full_so_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/so/utils.py b/lm_eval/tasks/global_mmlu/full/so/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/so/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml new file mode 100644 index 00000000..e322d980 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sr +task: + - global_mmlu_full_sr_stem + - global_mmlu_full_sr_other + - global_mmlu_full_sr_social_sciences + - global_mmlu_full_sr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml new file mode 100644 index 00000000..080bc545 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_humanities +task: + - global_mmlu_full_sr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml new file mode 100644 index 00000000..9f0735eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_other +task: + - global_mmlu_full_sr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml new file mode 100644 index 00000000..bdc29d1f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_social_sciences +task: + - global_mmlu_full_sr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml new file mode 100644 index 00000000..7c4aa636 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sr_stem +task: + - global_mmlu_full_sr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml b/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml new file mode 100644 index 00000000..6af61b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/_sr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml new file mode 100644 index 00000000..b3275870 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml new file mode 100644 index 00000000..5689af73 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml new file mode 100644 index 00000000..3d23a438 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml new file mode 100644 index 00000000..e89f5e61 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml new file mode 100644 index 00000000..b5611c15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml new file mode 100644 index 00000000..9e28c303 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml new file mode 100644 index 00000000..1eac952c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml new file mode 100644 index 00000000..e1146aa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml new file mode 100644 index 00000000..bcfda2ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml new file mode 100644 index 00000000..3beb5b26 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml new file mode 100644 index 00000000..f959a02f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml new file mode 100644 index 00000000..7e8761e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml new file mode 100644 index 00000000..9325f6de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml new file mode 100644 index 00000000..cc4a5bcc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml new file mode 100644 index 00000000..d3a5a78b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml new file mode 100644 index 00000000..50f60166 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml new file mode 100644 index 00000000..8bdd854f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml new file mode 100644 index 00000000..88862d21 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml new file mode 100644 index 00000000..8f2b2952 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml new file mode 100644 index 00000000..6b89deb1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml new file mode 100644 index 00000000..55fd7e8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml new file mode 100644 index 00000000..946acf0e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml new file mode 100644 index 00000000..07058971 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml new file mode 100644 index 00000000..a9721c9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml new file mode 100644 index 00000000..fedea95a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml new file mode 100644 index 00000000..dca9e140 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml new file mode 100644 index 00000000..b01276f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml new file mode 100644 index 00000000..f549f8ac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml new file mode 100644 index 00000000..c6b31eee --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml new file mode 100644 index 00000000..12d0f0e5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml new file mode 100644 index 00000000..98c40100 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml new file mode 100644 index 00000000..76e6b45c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml new file mode 100644 index 00000000..b0ff1d95 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml new file mode 100644 index 00000000..73a30099 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml new file mode 100644 index 00000000..0aea0826 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml new file mode 100644 index 00000000..debe604f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml new file mode 100644 index 00000000..407417f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml new file mode 100644 index 00000000..513a7f87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sr_stem_tasks +task: global_mmlu_full_sr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml new file mode 100644 index 00000000..fca9de04 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_management diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml new file mode 100644 index 00000000..8267563e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml new file mode 100644 index 00000000..4ba860f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml new file mode 100644 index 00000000..ecdbcea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml new file mode 100644 index 00000000..54bf3491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml new file mode 100644 index 00000000..2eab8d4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml new file mode 100644 index 00000000..83e1b84c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml new file mode 100644 index 00000000..654ee86b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml new file mode 100644 index 00000000..3a2f944b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml new file mode 100644 index 00000000..648ae0cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml new file mode 100644 index 00000000..0ee8a831 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml new file mode 100644 index 00000000..3b142115 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml new file mode 100644 index 00000000..19e2dc54 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml new file mode 100644 index 00000000..043024c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml new file mode 100644 index 00000000..24720925 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml new file mode 100644 index 00000000..fc93c5e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml new file mode 100644 index 00000000..1b338dd6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sr_social_sciences_tasks +task: global_mmlu_full_sr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml new file mode 100644 index 00000000..b07588ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sr_other_tasks +task: global_mmlu_full_sr_virology diff --git a/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml new file mode 100644 index 00000000..3f78403e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/global_mmlu_full_sr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sr_humanities_tasks +task: global_mmlu_full_sr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sr/utils.py b/lm_eval/tasks/global_mmlu/full/sr/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml new file mode 100644 index 00000000..a9b0dc1b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sv +task: + - global_mmlu_full_sv_stem + - global_mmlu_full_sv_other + - global_mmlu_full_sv_social_sciences + - global_mmlu_full_sv_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml new file mode 100644 index 00000000..f8b4628f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_humanities +task: + - global_mmlu_full_sv_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml new file mode 100644 index 00000000..1b29ca13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_other +task: + - global_mmlu_full_sv_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml new file mode 100644 index 00000000..7c4a813e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_social_sciences +task: + - global_mmlu_full_sv_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml new file mode 100644 index 00000000..a6fd88f1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sv_stem +task: + - global_mmlu_full_sv_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml b/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml new file mode 100644 index 00000000..1b9fdea9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/_sv_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sv +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml new file mode 100644 index 00000000..8329302f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml new file mode 100644 index 00000000..ac9fa560 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml new file mode 100644 index 00000000..096e0e8f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml new file mode 100644 index 00000000..ced0b051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml new file mode 100644 index 00000000..a88871b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml new file mode 100644 index 00000000..c2462c17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml new file mode 100644 index 00000000..3ae3fecd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml new file mode 100644 index 00000000..a3f00b24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml new file mode 100644 index 00000000..71f613d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml new file mode 100644 index 00000000..46f4c6ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml new file mode 100644 index 00000000..06906bfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml new file mode 100644 index 00000000..1013ef30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml new file mode 100644 index 00000000..a6a752f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml new file mode 100644 index 00000000..547365f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml new file mode 100644 index 00000000..74086a15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml new file mode 100644 index 00000000..8d1f4847 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml new file mode 100644 index 00000000..b78b5846 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml new file mode 100644 index 00000000..dd205629 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml new file mode 100644 index 00000000..fc6ebf2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml new file mode 100644 index 00000000..03773a83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml new file mode 100644 index 00000000..e3db653a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml new file mode 100644 index 00000000..4a087557 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml new file mode 100644 index 00000000..63855384 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml new file mode 100644 index 00000000..7e62f26f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml new file mode 100644 index 00000000..b686a26e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml new file mode 100644 index 00000000..17716538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml new file mode 100644 index 00000000..e9817c17 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml new file mode 100644 index 00000000..61359149 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml new file mode 100644 index 00000000..ce3aa9e2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml new file mode 100644 index 00000000..6f705f8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml new file mode 100644 index 00000000..765cdf60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml new file mode 100644 index 00000000..de7b30b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml new file mode 100644 index 00000000..20969051 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml new file mode 100644 index 00000000..a8bd5fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml new file mode 100644 index 00000000..7e5ddb57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml new file mode 100644 index 00000000..ff161d5f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml new file mode 100644 index 00000000..f1602c90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml new file mode 100644 index 00000000..6f011063 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sv_stem_tasks +task: global_mmlu_full_sv_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml new file mode 100644 index 00000000..7ff7b873 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_management diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml new file mode 100644 index 00000000..c0e669f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml new file mode 100644 index 00000000..83e52445 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml new file mode 100644 index 00000000..f1798792 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml new file mode 100644 index 00000000..1f03ac09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml new file mode 100644 index 00000000..fe7f58d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml new file mode 100644 index 00000000..79207a87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml new file mode 100644 index 00000000..ae533079 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml new file mode 100644 index 00000000..1c602c4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml new file mode 100644 index 00000000..ebdef8a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml new file mode 100644 index 00000000..3645c38a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml new file mode 100644 index 00000000..d40f577d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml new file mode 100644 index 00000000..edf83106 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml new file mode 100644 index 00000000..f897662c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml new file mode 100644 index 00000000..9ad4fb5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml new file mode 100644 index 00000000..4b869606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml new file mode 100644 index 00000000..522778de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sv_social_sciences_tasks +task: global_mmlu_full_sv_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml new file mode 100644 index 00000000..8b3cbc8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sv_other_tasks +task: global_mmlu_full_sv_virology diff --git a/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml new file mode 100644 index 00000000..1d7df52b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/global_mmlu_full_sv_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sv_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sv_humanities_tasks +task: global_mmlu_full_sv_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sv/utils.py b/lm_eval/tasks/global_mmlu/full/sv/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sv/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml new file mode 100644 index 00000000..274543cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_sw +task: + - global_mmlu_full_sw_stem + - global_mmlu_full_sw_other + - global_mmlu_full_sw_social_sciences + - global_mmlu_full_sw_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml new file mode 100644 index 00000000..02168dff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_humanities +task: + - global_mmlu_full_sw_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml new file mode 100644 index 00000000..9fa28a16 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_other +task: + - global_mmlu_full_sw_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml new file mode 100644 index 00000000..ad318442 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_social_sciences +task: + - global_mmlu_full_sw_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml new file mode 100644 index 00000000..6f23cae8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_sw_stem +task: + - global_mmlu_full_sw_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml b/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml new file mode 100644 index 00000000..58cf5322 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/_sw_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: sw +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml new file mode 100644 index 00000000..187229fb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml new file mode 100644 index 00000000..3d0d4c5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml new file mode 100644 index 00000000..0639b390 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml new file mode 100644 index 00000000..a729c9da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml new file mode 100644 index 00000000..c6b83623 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml new file mode 100644 index 00000000..1856b934 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml new file mode 100644 index 00000000..5ad547ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml new file mode 100644 index 00000000..ff8d8741 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml new file mode 100644 index 00000000..02f53a4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml new file mode 100644 index 00000000..b9f4cc6c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml new file mode 100644 index 00000000..bcca5b3f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml new file mode 100644 index 00000000..434d2faa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml new file mode 100644 index 00000000..2c1c9d41 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml new file mode 100644 index 00000000..2a907de6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml new file mode 100644 index 00000000..1ae86a7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml new file mode 100644 index 00000000..05871f25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml new file mode 100644 index 00000000..8d0de407 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml new file mode 100644 index 00000000..29bec055 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml new file mode 100644 index 00000000..2e49866a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml new file mode 100644 index 00000000..a7adbd97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml new file mode 100644 index 00000000..2e65ab5a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml new file mode 100644 index 00000000..7352ad72 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml new file mode 100644 index 00000000..797932ba --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml new file mode 100644 index 00000000..602d71ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml new file mode 100644 index 00000000..a91dd829 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml new file mode 100644 index 00000000..c19b28da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml new file mode 100644 index 00000000..7a9c63bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml new file mode 100644 index 00000000..239eac65 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml new file mode 100644 index 00000000..b4f19d84 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml new file mode 100644 index 00000000..5725af63 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml new file mode 100644 index 00000000..1d080340 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml new file mode 100644 index 00000000..cfe5a9e7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml new file mode 100644 index 00000000..ba20e932 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml new file mode 100644 index 00000000..4609bea0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml new file mode 100644 index 00000000..bbf616b1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_international_law diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml new file mode 100644 index 00000000..6781f2d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml new file mode 100644 index 00000000..1f862917 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml new file mode 100644 index 00000000..9eb51cfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_sw_stem_tasks +task: global_mmlu_full_sw_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml new file mode 100644 index 00000000..5b0e9e67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_management diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml new file mode 100644 index 00000000..fb65e87e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_marketing diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml new file mode 100644 index 00000000..10d4db0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml new file mode 100644 index 00000000..b337d0ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml new file mode 100644 index 00000000..f44bfa0d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml new file mode 100644 index 00000000..eabd5a91 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml new file mode 100644 index 00000000..41c64458 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml new file mode 100644 index 00000000..96edac99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml new file mode 100644 index 00000000..db94a2ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml new file mode 100644 index 00000000..7cd19d35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml new file mode 100644 index 00000000..9434ae4c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml new file mode 100644 index 00000000..cf35b9c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml new file mode 100644 index 00000000..7570e288 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml new file mode 100644 index 00000000..54c094db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml new file mode 100644 index 00000000..c8d5a42c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml new file mode 100644 index 00000000..79d51a58 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_sociology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml new file mode 100644 index 00000000..523b1572 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_sw_social_sciences_tasks +task: global_mmlu_full_sw_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml new file mode 100644 index 00000000..43179ff8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_sw_other_tasks +task: global_mmlu_full_sw_virology diff --git a/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml new file mode 100644 index 00000000..bef7b7f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/global_mmlu_full_sw_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_sw_humanities_tasks +task: global_mmlu_full_sw_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/sw/utils.py b/lm_eval/tasks/global_mmlu/full/sw/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/sw/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml new file mode 100644 index 00000000..5ef0f7ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_te +task: + - global_mmlu_full_te_stem + - global_mmlu_full_te_other + - global_mmlu_full_te_social_sciences + - global_mmlu_full_te_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml new file mode 100644 index 00000000..7a3c479e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_humanities +task: + - global_mmlu_full_te_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml new file mode 100644 index 00000000..2932844a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_other +task: + - global_mmlu_full_te_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml new file mode 100644 index 00000000..25e721db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_social_sciences +task: + - global_mmlu_full_te_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml new file mode 100644 index 00000000..fe2426ca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_te_stem +task: + - global_mmlu_full_te_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml b/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml new file mode 100644 index 00000000..d7b1190d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/_te_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: te +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml new file mode 100644 index 00000000..e922fd08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml new file mode 100644 index 00000000..00582018 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml new file mode 100644 index 00000000..5bc5e76e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml new file mode 100644 index 00000000..7b440102 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml new file mode 100644 index 00000000..90e56184 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml new file mode 100644 index 00000000..0f036e60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml new file mode 100644 index 00000000..ccdb849a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml new file mode 100644 index 00000000..f11e5657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml new file mode 100644 index 00000000..c5022ce2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml new file mode 100644 index 00000000..bd5219f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml new file mode 100644 index 00000000..88dad05a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml new file mode 100644 index 00000000..0e8f37fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml new file mode 100644 index 00000000..f0527625 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml new file mode 100644 index 00000000..cf008a67 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml new file mode 100644 index 00000000..97169e93 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml new file mode 100644 index 00000000..f3edc896 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml new file mode 100644 index 00000000..d4c182d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml new file mode 100644 index 00000000..53b52f4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml new file mode 100644 index 00000000..5f02170f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml new file mode 100644 index 00000000..c77d30aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml new file mode 100644 index 00000000..7f388a06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml new file mode 100644 index 00000000..75d54d72 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml new file mode 100644 index 00000000..383596ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml new file mode 100644 index 00000000..8db56a85 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml new file mode 100644 index 00000000..bd471b8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml new file mode 100644 index 00000000..58f577ed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml new file mode 100644 index 00000000..400a3805 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml new file mode 100644 index 00000000..694ddc30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml new file mode 100644 index 00000000..b900af19 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml new file mode 100644 index 00000000..3492e724 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml new file mode 100644 index 00000000..48a2d75a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml new file mode 100644 index 00000000..7e95f7ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml new file mode 100644 index 00000000..dc44c1b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml new file mode 100644 index 00000000..d7631419 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml new file mode 100644 index 00000000..0c2c7862 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_international_law diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml new file mode 100644 index 00000000..718cd9fa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml new file mode 100644 index 00000000..7bb9170c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml new file mode 100644 index 00000000..12355538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_te_stem_tasks +task: global_mmlu_full_te_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml new file mode 100644 index 00000000..f092416f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_management diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml new file mode 100644 index 00000000..15b84b46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_marketing diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml new file mode 100644 index 00000000..8f0730be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml new file mode 100644 index 00000000..53487f55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml new file mode 100644 index 00000000..fca8df9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml new file mode 100644 index 00000000..d87f6b02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml new file mode 100644 index 00000000..9348a76e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml new file mode 100644 index 00000000..c8efe8d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml new file mode 100644 index 00000000..b702542e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml new file mode 100644 index 00000000..045b6e1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml new file mode 100644 index 00000000..5e5fa308 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml new file mode 100644 index 00000000..d4ede33f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml new file mode 100644 index 00000000..cb1906d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml new file mode 100644 index 00000000..1ac09ce0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml new file mode 100644 index 00000000..bbb7bc7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml new file mode 100644 index 00000000..e080e082 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_sociology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml new file mode 100644 index 00000000..338f0809 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_te_social_sciences_tasks +task: global_mmlu_full_te_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml new file mode 100644 index 00000000..1f5e38a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_te_other_tasks +task: global_mmlu_full_te_virology diff --git a/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml new file mode 100644 index 00000000..4da26e3e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/global_mmlu_full_te_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _te_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_te_humanities_tasks +task: global_mmlu_full_te_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/te/utils.py b/lm_eval/tasks/global_mmlu/full/te/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/te/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml new file mode 100644 index 00000000..8cd3d3f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_tr +task: + - global_mmlu_full_tr_stem + - global_mmlu_full_tr_other + - global_mmlu_full_tr_social_sciences + - global_mmlu_full_tr_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml new file mode 100644 index 00000000..f4dade15 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_humanities +task: + - global_mmlu_full_tr_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml new file mode 100644 index 00000000..e80a5b9d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_other +task: + - global_mmlu_full_tr_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml new file mode 100644 index 00000000..56fc20e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_social_sciences +task: + - global_mmlu_full_tr_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml new file mode 100644 index 00000000..51f9bb3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_tr_stem +task: + - global_mmlu_full_tr_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml b/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml new file mode 100644 index 00000000..e322bee6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/_tr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: tr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml new file mode 100644 index 00000000..1e821573 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml new file mode 100644 index 00000000..44440225 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml new file mode 100644 index 00000000..e85390bf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml new file mode 100644 index 00000000..4b1afc9c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml new file mode 100644 index 00000000..bdfa69e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml new file mode 100644 index 00000000..df43a67c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml new file mode 100644 index 00000000..af2b8b3e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml new file mode 100644 index 00000000..622854f4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml new file mode 100644 index 00000000..902bd9c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml new file mode 100644 index 00000000..6b44d0d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml new file mode 100644 index 00000000..27540d97 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml new file mode 100644 index 00000000..dbcabeed --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml new file mode 100644 index 00000000..628a4fcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml new file mode 100644 index 00000000..6feb236f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml new file mode 100644 index 00000000..9a2a8665 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml new file mode 100644 index 00000000..ffc6dee7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml new file mode 100644 index 00000000..77c189a0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml new file mode 100644 index 00000000..a756d102 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml new file mode 100644 index 00000000..51e7dd9e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml new file mode 100644 index 00000000..077476ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml new file mode 100644 index 00000000..cb60e042 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml new file mode 100644 index 00000000..2b989e05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml new file mode 100644 index 00000000..8a0c4d90 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2a585f02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml new file mode 100644 index 00000000..f88e9831 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml new file mode 100644 index 00000000..e880b0b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml new file mode 100644 index 00000000..5527bed2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml new file mode 100644 index 00000000..da93a96e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml new file mode 100644 index 00000000..a28e110c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml new file mode 100644 index 00000000..93871dcf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml new file mode 100644 index 00000000..507a4d5c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml new file mode 100644 index 00000000..60cc713e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml new file mode 100644 index 00000000..8e48bf12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml new file mode 100644 index 00000000..84a95850 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml new file mode 100644 index 00000000..d0dc429f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_international_law diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml new file mode 100644 index 00000000..ea3b7a51 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml new file mode 100644 index 00000000..cd61d7d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml new file mode 100644 index 00000000..b0e785c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_tr_stem_tasks +task: global_mmlu_full_tr_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml new file mode 100644 index 00000000..5ce0d753 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_management diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml new file mode 100644 index 00000000..8ffd4986 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_marketing diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml new file mode 100644 index 00000000..43814b40 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml new file mode 100644 index 00000000..e21cfcf6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml new file mode 100644 index 00000000..88fbfbe2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml new file mode 100644 index 00000000..9f92f855 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml new file mode 100644 index 00000000..31b39c38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml new file mode 100644 index 00000000..283a2b89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml new file mode 100644 index 00000000..e4c17014 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml new file mode 100644 index 00000000..c69f14f7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml new file mode 100644 index 00000000..8f5e97c6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml new file mode 100644 index 00000000..00a5f32a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml new file mode 100644 index 00000000..c8571bdb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml new file mode 100644 index 00000000..539f8da6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml new file mode 100644 index 00000000..4203e365 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml new file mode 100644 index 00000000..9cf6352c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_sociology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml new file mode 100644 index 00000000..b86a699b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_tr_social_sciences_tasks +task: global_mmlu_full_tr_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml new file mode 100644 index 00000000..001cbb28 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_tr_other_tasks +task: global_mmlu_full_tr_virology diff --git a/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml new file mode 100644 index 00000000..1f1d4e4f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/global_mmlu_full_tr_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _tr_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_tr_humanities_tasks +task: global_mmlu_full_tr_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/tr/utils.py b/lm_eval/tasks/global_mmlu/full/tr/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/tr/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml new file mode 100644 index 00000000..e880be32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_uk +task: + - global_mmlu_full_uk_stem + - global_mmlu_full_uk_other + - global_mmlu_full_uk_social_sciences + - global_mmlu_full_uk_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml new file mode 100644 index 00000000..b3ec01db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_humanities +task: + - global_mmlu_full_uk_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml new file mode 100644 index 00000000..176b1861 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_other +task: + - global_mmlu_full_uk_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml new file mode 100644 index 00000000..66b36a60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_social_sciences +task: + - global_mmlu_full_uk_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml new file mode 100644 index 00000000..4deba657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_uk_stem +task: + - global_mmlu_full_uk_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml b/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml new file mode 100644 index 00000000..5765ce13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/_uk_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: uk +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml new file mode 100644 index 00000000..ce37c715 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml new file mode 100644 index 00000000..db1433d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml new file mode 100644 index 00000000..6b123ece --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml new file mode 100644 index 00000000..775d2f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml new file mode 100644 index 00000000..5f71076d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml new file mode 100644 index 00000000..92342ac7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml new file mode 100644 index 00000000..71384a8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml new file mode 100644 index 00000000..6013afe1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml new file mode 100644 index 00000000..27b60491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml new file mode 100644 index 00000000..87131c25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml new file mode 100644 index 00000000..93109632 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml new file mode 100644 index 00000000..0f11fcce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml new file mode 100644 index 00000000..7ff9715a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml new file mode 100644 index 00000000..ba92e4b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml new file mode 100644 index 00000000..3a1c86ff --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml new file mode 100644 index 00000000..7d80cce7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml new file mode 100644 index 00000000..9f8a4091 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml new file mode 100644 index 00000000..ebd6c2da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml new file mode 100644 index 00000000..a8b0cf3a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml new file mode 100644 index 00000000..010dbec3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml new file mode 100644 index 00000000..9a270144 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml new file mode 100644 index 00000000..52e80017 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml new file mode 100644 index 00000000..4f41dd3d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml new file mode 100644 index 00000000..72c589ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml new file mode 100644 index 00000000..e70675d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml new file mode 100644 index 00000000..e29c558e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml new file mode 100644 index 00000000..6b735495 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml new file mode 100644 index 00000000..69a03c06 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml new file mode 100644 index 00000000..9b02711c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml new file mode 100644 index 00000000..60cc0cdd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml new file mode 100644 index 00000000..b62244eb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml new file mode 100644 index 00000000..57667edc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml new file mode 100644 index 00000000..02804890 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml new file mode 100644 index 00000000..37382bab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml new file mode 100644 index 00000000..d1b046d7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_international_law diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml new file mode 100644 index 00000000..12b9da52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml new file mode 100644 index 00000000..abb2de2a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml new file mode 100644 index 00000000..7a1a6f34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_uk_stem_tasks +task: global_mmlu_full_uk_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml new file mode 100644 index 00000000..ec4cb17d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_management diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml new file mode 100644 index 00000000..afbdaee2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_marketing diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml new file mode 100644 index 00000000..bc1fe1bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml new file mode 100644 index 00000000..8f3b18f8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml new file mode 100644 index 00000000..34b54e34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml new file mode 100644 index 00000000..38706977 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml new file mode 100644 index 00000000..9f9dd1fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml new file mode 100644 index 00000000..4e981008 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml new file mode 100644 index 00000000..08e3c2af --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml new file mode 100644 index 00000000..dc02a7b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml new file mode 100644 index 00000000..7090a6e1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml new file mode 100644 index 00000000..0b43dcfb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml new file mode 100644 index 00000000..b279a94c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml new file mode 100644 index 00000000..3b45dc62 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml new file mode 100644 index 00000000..4ea308da --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml new file mode 100644 index 00000000..a7aa08ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_sociology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml new file mode 100644 index 00000000..d089e778 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_uk_social_sciences_tasks +task: global_mmlu_full_uk_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml new file mode 100644 index 00000000..41b627f5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_uk_other_tasks +task: global_mmlu_full_uk_virology diff --git a/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml new file mode 100644 index 00000000..f5d6d415 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/global_mmlu_full_uk_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _uk_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_uk_humanities_tasks +task: global_mmlu_full_uk_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/uk/utils.py b/lm_eval/tasks/global_mmlu/full/uk/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/uk/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml new file mode 100644 index 00000000..d6413b35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_vi +task: + - global_mmlu_full_vi_stem + - global_mmlu_full_vi_other + - global_mmlu_full_vi_social_sciences + - global_mmlu_full_vi_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml new file mode 100644 index 00000000..7a05acca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_humanities +task: + - global_mmlu_full_vi_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml new file mode 100644 index 00000000..880bab9a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_other +task: + - global_mmlu_full_vi_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml new file mode 100644 index 00000000..6da224f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_social_sciences +task: + - global_mmlu_full_vi_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml new file mode 100644 index 00000000..12526ce7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_vi_stem +task: + - global_mmlu_full_vi_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml b/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml new file mode 100644 index 00000000..5a0ca817 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/_vi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: vi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml new file mode 100644 index 00000000..47dc80ce --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml new file mode 100644 index 00000000..d29cb583 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml new file mode 100644 index 00000000..3e3ba1dc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml new file mode 100644 index 00000000..3afecdc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml new file mode 100644 index 00000000..34a90a8e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml new file mode 100644 index 00000000..63a4c772 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml new file mode 100644 index 00000000..f7226e02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml new file mode 100644 index 00000000..90a9e0b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml new file mode 100644 index 00000000..a09173d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml new file mode 100644 index 00000000..22dc78bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml new file mode 100644 index 00000000..a6f8dbca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml new file mode 100644 index 00000000..4d4b3d60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml new file mode 100644 index 00000000..6c501d0a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml new file mode 100644 index 00000000..d0936b3b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml new file mode 100644 index 00000000..3b23387f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml new file mode 100644 index 00000000..9c098266 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml new file mode 100644 index 00000000..21a28bb4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml new file mode 100644 index 00000000..a912dba1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml new file mode 100644 index 00000000..e334fb1c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml new file mode 100644 index 00000000..ba98297e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml new file mode 100644 index 00000000..22e0b00a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml new file mode 100644 index 00000000..06507b7c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml new file mode 100644 index 00000000..d6eeec7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml new file mode 100644 index 00000000..2faf2b09 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml new file mode 100644 index 00000000..16ed50b8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml new file mode 100644 index 00000000..1cad75ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml new file mode 100644 index 00000000..4499711f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml new file mode 100644 index 00000000..bb92f446 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml new file mode 100644 index 00000000..0a12e4de --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml new file mode 100644 index 00000000..3ae34e4d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml new file mode 100644 index 00000000..9ad96b12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml new file mode 100644 index 00000000..5df3661c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml new file mode 100644 index 00000000..57820fab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml new file mode 100644 index 00000000..5b53962b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml new file mode 100644 index 00000000..5f81b09e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_international_law diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml new file mode 100644 index 00000000..52ec47d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml new file mode 100644 index 00000000..ed89994d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml new file mode 100644 index 00000000..258bd8c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_vi_stem_tasks +task: global_mmlu_full_vi_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml new file mode 100644 index 00000000..1bd2f606 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_management diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml new file mode 100644 index 00000000..951a3642 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_marketing diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml new file mode 100644 index 00000000..9d606007 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml new file mode 100644 index 00000000..a0cae1b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml new file mode 100644 index 00000000..07987487 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml new file mode 100644 index 00000000..6a852bc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml new file mode 100644 index 00000000..42b198f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml new file mode 100644 index 00000000..a7ffc316 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml new file mode 100644 index 00000000..96349674 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml new file mode 100644 index 00000000..da949e34 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml new file mode 100644 index 00000000..81c74535 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml new file mode 100644 index 00000000..7315b353 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml new file mode 100644 index 00000000..f2eb1652 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml new file mode 100644 index 00000000..12933f08 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml new file mode 100644 index 00000000..7e90ba55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml new file mode 100644 index 00000000..056c757b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_sociology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml new file mode 100644 index 00000000..5bcd95d6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_vi_social_sciences_tasks +task: global_mmlu_full_vi_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml new file mode 100644 index 00000000..775b0cca --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_vi_other_tasks +task: global_mmlu_full_vi_virology diff --git a/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml new file mode 100644 index 00000000..db6ba6e0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/global_mmlu_full_vi_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _vi_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_vi_humanities_tasks +task: global_mmlu_full_vi_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/vi/utils.py b/lm_eval/tasks/global_mmlu/full/vi/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/vi/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml new file mode 100644 index 00000000..ba9f2460 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_yo +task: + - global_mmlu_full_yo_stem + - global_mmlu_full_yo_other + - global_mmlu_full_yo_social_sciences + - global_mmlu_full_yo_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml new file mode 100644 index 00000000..4e3b3c11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_humanities +task: + - global_mmlu_full_yo_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml new file mode 100644 index 00000000..ed81bdfe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_other +task: + - global_mmlu_full_yo_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml new file mode 100644 index 00000000..bab52fa2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_social_sciences +task: + - global_mmlu_full_yo_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml new file mode 100644 index 00000000..3687d569 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_yo_stem +task: + - global_mmlu_full_yo_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml b/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml new file mode 100644 index 00000000..ceefadf5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/_yo_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: yo +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml new file mode 100644 index 00000000..ef817a38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml new file mode 100644 index 00000000..a3bae5d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml new file mode 100644 index 00000000..b39aa143 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml new file mode 100644 index 00000000..58832982 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml new file mode 100644 index 00000000..21dcf842 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml new file mode 100644 index 00000000..f3abaf24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml new file mode 100644 index 00000000..0468634b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml new file mode 100644 index 00000000..df6e5844 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml new file mode 100644 index 00000000..0542a4fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml new file mode 100644 index 00000000..cce0b497 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml new file mode 100644 index 00000000..84ca1413 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml new file mode 100644 index 00000000..001689e9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml new file mode 100644 index 00000000..dcff962c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml new file mode 100644 index 00000000..6d055d6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml new file mode 100644 index 00000000..c21f7f02 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml new file mode 100644 index 00000000..9b6173f0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml new file mode 100644 index 00000000..2ffc9740 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml new file mode 100644 index 00000000..394a143a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml new file mode 100644 index 00000000..f0de1887 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml new file mode 100644 index 00000000..02b16fae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml new file mode 100644 index 00000000..94733faa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml new file mode 100644 index 00000000..6ec4070e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml new file mode 100644 index 00000000..4ab051d9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml new file mode 100644 index 00000000..bedf7f20 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml new file mode 100644 index 00000000..cb486709 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml new file mode 100644 index 00000000..cea21a89 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml new file mode 100644 index 00000000..a8eae6cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml new file mode 100644 index 00000000..cdaca54f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml new file mode 100644 index 00000000..ef3d7527 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml new file mode 100644 index 00000000..0ec62db0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml new file mode 100644 index 00000000..30c8573c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml new file mode 100644 index 00000000..52f91d43 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml new file mode 100644 index 00000000..4ab0ec2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml new file mode 100644 index 00000000..f510c2d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml new file mode 100644 index 00000000..9b657110 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_international_law diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml new file mode 100644 index 00000000..e3ac0a52 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml new file mode 100644 index 00000000..a7a9e718 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml new file mode 100644 index 00000000..4a61d3ae --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_yo_stem_tasks +task: global_mmlu_full_yo_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml new file mode 100644 index 00000000..92b0b526 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_management diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml new file mode 100644 index 00000000..74c17559 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_marketing diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml new file mode 100644 index 00000000..cfc2c8cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml new file mode 100644 index 00000000..ad12bde6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml new file mode 100644 index 00000000..2e85331c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml new file mode 100644 index 00000000..9a6a6fc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml new file mode 100644 index 00000000..62d9ae7b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml new file mode 100644 index 00000000..de42ec7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml new file mode 100644 index 00000000..e2ad3236 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml new file mode 100644 index 00000000..198f227b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml new file mode 100644 index 00000000..e5942f74 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml new file mode 100644 index 00000000..efd4ab7d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml new file mode 100644 index 00000000..e1956c87 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml new file mode 100644 index 00000000..5c6c2b8c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml new file mode 100644 index 00000000..a12c4abd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml new file mode 100644 index 00000000..e5747900 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_sociology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml new file mode 100644 index 00000000..493dda39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_yo_social_sciences_tasks +task: global_mmlu_full_yo_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml new file mode 100644 index 00000000..420b1b01 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_yo_other_tasks +task: global_mmlu_full_yo_virology diff --git a/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml new file mode 100644 index 00000000..c0964b30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/global_mmlu_full_yo_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_yo_humanities_tasks +task: global_mmlu_full_yo_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/yo/utils.py b/lm_eval/tasks/global_mmlu/full/yo/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/yo/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml new file mode 100644 index 00000000..098ec097 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml @@ -0,0 +1,11 @@ +group: global_mmlu_full_zh +task: + - global_mmlu_full_zh_stem + - global_mmlu_full_zh_other + - global_mmlu_full_zh_social_sciences + - global_mmlu_full_zh_humanities +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml new file mode 100644 index 00000000..fb347da8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_humanities.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_humanities +task: + - global_mmlu_full_zh_humanities_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml new file mode 100644 index 00000000..98d4ed5e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_other.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_other +task: + - global_mmlu_full_zh_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml new file mode 100644 index 00000000..235012e6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_social_sciences.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_social_sciences +task: + - global_mmlu_full_zh_social_sciences_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml new file mode 100644 index 00000000..660486a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh_stem.yaml @@ -0,0 +1,8 @@ +group: global_mmlu_full_zh_stem +task: + - global_mmlu_full_zh_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml b/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml new file mode 100644 index 00000000..2c83d495 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/_zh_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU +dataset_name: zh +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml new file mode 100644 index 00000000..42ea6276 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_abstract_algebra.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_abstract_algebra +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_abstract_algebra diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml new file mode 100644 index 00000000..45001d14 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_anatomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_anatomy +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_anatomy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml new file mode 100644 index 00000000..37183dc7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_astronomy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_astronomy +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_astronomy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml new file mode 100644 index 00000000..bbb5ea38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_business_ethics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_business_ethics +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_business_ethics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml new file mode 100644 index 00000000..d90ee0ea --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_clinical_knowledge.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_clinical_knowledge +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_clinical_knowledge diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml new file mode 100644 index 00000000..ba2031fe --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_biology +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_biology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml new file mode 100644 index 00000000..860761b4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_chemistry +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml new file mode 100644 index 00000000..53d01965 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_computer_science +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml new file mode 100644 index 00000000..dbd2e4be --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml new file mode 100644 index 00000000..523d6b30 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_medicine +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_college_medicine diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml new file mode 100644 index 00000000..0a08214f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_college_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_college_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_college_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml new file mode 100644 index 00000000..99332b35 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_computer_security.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_computer_security +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_computer_security diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml new file mode 100644 index 00000000..b042cc8b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_conceptual_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_conceptual_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_conceptual_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml new file mode 100644 index 00000000..bf920112 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_econometrics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_econometrics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_econometrics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml new file mode 100644 index 00000000..b30acad7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_electrical_engineering.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_electrical_engineering +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_electrical_engineering diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml new file mode 100644 index 00000000..3b108c42 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_elementary_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_elementary_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_elementary_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml new file mode 100644 index 00000000..64775599 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_formal_logic.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_formal_logic +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_formal_logic diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml new file mode 100644 index 00000000..07d390aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_global_facts.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_global_facts +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_global_facts diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml new file mode 100644 index 00000000..28b2bdaa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_biology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_biology +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_biology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml new file mode 100644 index 00000000..4d084034 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_chemistry.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_chemistry +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_chemistry diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml new file mode 100644 index 00000000..6232ef60 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_computer_science.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_computer_science +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_computer_science diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml new file mode 100644 index 00000000..70e3e52b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_european_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_european_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_european_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml new file mode 100644 index 00000000..fe6cb913 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_geography.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_geography +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_geography diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml new file mode 100644 index 00000000..cfa7213a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_government_and_politics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_government_and_politics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_government_and_politics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml new file mode 100644 index 00000000..ca0b7ad8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_macroeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_macroeconomics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_macroeconomics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml new file mode 100644 index 00000000..38868e96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_mathematics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_mathematics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_mathematics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml new file mode 100644 index 00000000..b79237d2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_microeconomics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_microeconomics +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_microeconomics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml new file mode 100644 index 00000000..6355da2f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_physics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_physics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_physics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml new file mode 100644 index 00000000..f2238867 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_psychology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_high_school_psychology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml new file mode 100644 index 00000000..9aac2097 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_statistics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_statistics +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_high_school_statistics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml new file mode 100644 index 00000000..47d8355f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_us_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_us_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_us_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml new file mode 100644 index 00000000..c1f6671f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_high_school_world_history.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_high_school_world_history +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_high_school_world_history diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml new file mode 100644 index 00000000..d6941ff7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_aging.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_human_aging +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_human_aging diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml new file mode 100644 index 00000000..ee228b22 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_human_sexuality.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_human_sexuality +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_human_sexuality diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml new file mode 100644 index 00000000..07b1ebd1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_international_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_international_law +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_international_law diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml new file mode 100644 index 00000000..ab10ffac --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_jurisprudence.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_jurisprudence +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_jurisprudence diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml new file mode 100644 index 00000000..451260b5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_logical_fallacies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_logical_fallacies +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_logical_fallacies diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml new file mode 100644 index 00000000..508d14f6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_machine_learning.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_machine_learning +tag: global_mmlu_full_zh_stem_tasks +task: global_mmlu_full_zh_machine_learning diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml new file mode 100644 index 00000000..9db0b32b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_management.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_management +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_management diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml new file mode 100644 index 00000000..a7142ce4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_marketing.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_marketing +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_marketing diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml new file mode 100644 index 00000000..22053090 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_medical_genetics.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_medical_genetics +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_medical_genetics diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml new file mode 100644 index 00000000..5b479c9b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_miscellaneous.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_miscellaneous +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_miscellaneous diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml new file mode 100644 index 00000000..58d13a99 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_disputes.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_moral_disputes +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_moral_disputes diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml new file mode 100644 index 00000000..95d91dfd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_moral_scenarios.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_moral_scenarios +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_moral_scenarios diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml new file mode 100644 index 00000000..57452a39 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_nutrition.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_nutrition +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_nutrition diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml new file mode 100644 index 00000000..20e237b2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_philosophy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_philosophy +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_philosophy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml new file mode 100644 index 00000000..56358fe7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_prehistory.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_prehistory +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_prehistory diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml new file mode 100644 index 00000000..630681ab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_accounting.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_accounting +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_professional_accounting diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml new file mode 100644 index 00000000..e48f35cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_law.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_law +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_professional_law diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml new file mode 100644 index 00000000..f75432cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_medicine.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_medicine +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_professional_medicine diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml new file mode 100644 index 00000000..fbbf45ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_professional_psychology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_professional_psychology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_professional_psychology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml new file mode 100644 index 00000000..f760d2a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_public_relations.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_public_relations +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_public_relations diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml new file mode 100644 index 00000000..1dafaf5d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_security_studies.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_security_studies +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_security_studies diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml new file mode 100644 index 00000000..549f4ef1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_sociology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_sociology +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_sociology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml new file mode 100644 index 00000000..597dcfa1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_us_foreign_policy.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_us_foreign_policy +tag: global_mmlu_full_zh_social_sciences_tasks +task: global_mmlu_full_zh_us_foreign_policy diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml new file mode 100644 index 00000000..1984c6b0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_virology.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_virology +tag: global_mmlu_full_zh_other_tasks +task: global_mmlu_full_zh_virology diff --git a/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml new file mode 100644 index 00000000..fa15c0cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/global_mmlu_full_zh_world_religions.yaml @@ -0,0 +1,5 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_world_religions +tag: global_mmlu_full_zh_humanities_tasks +task: global_mmlu_full_zh_world_religions diff --git a/lm_eval/tasks/global_mmlu/full/zh/utils.py b/lm_eval/tasks/global_mmlu/full/zh/utils.py new file mode 100644 index 00000000..7df72cb0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/full/zh/utils.py @@ -0,0 +1,73 @@ +from functools import partial + + +SUBJECTS = [ + "abstract_algebra", + "anatomy", + "astronomy", + "business_ethics", + "clinical_knowledge", + "college_biology", + "college_chemistry", + "college_computer_science", + "college_mathematics", + "college_medicine", + "college_physics", + "computer_security", + "conceptual_physics", + "econometrics", + "electrical_engineering", + "elementary_mathematics", + "formal_logic", + "global_facts", + "high_school_biology", + "high_school_chemistry", + "high_school_computer_science", + "high_school_european_history", + "high_school_geography", + "high_school_government_and_politics", + "high_school_macroeconomics", + "high_school_mathematics", + "high_school_microeconomics", + "high_school_physics", + "high_school_psychology", + "high_school_statistics", + "high_school_us_history", + "high_school_world_history", + "human_aging", + "human_sexuality", + "international_law", + "jurisprudence", + "logical_fallacies", + "machine_learning", + "management", + "marketing", + "medical_genetics", + "miscellaneous", + "moral_disputes", + "moral_scenarios", + "nutrition", + "philosophy", + "prehistory", + "professional_accounting", + "professional_law", + "professional_medicine", + "professional_psychology", + "public_relations", + "security_studies", + "sociology", + "us_foreign_policy", + "virology", + "world_religions", +] + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["subject"] == subject) + + +process_functions = { + f"process_{subject}": partial(process_docs, subject=subject) for subject in SUBJECTS +} + +globals().update(process_functions) -- GitLab From ff2c49ff2b5fae3cdc1bb5fac4f9d8c9b02694b7 Mon Sep 17 00:00:00 2001 From: Gyouk Chu <94156717+GyoukChu@users.noreply.github.com> Date: Tue, 21 Jan 2025 06:05:00 +0900 Subject: [PATCH 04/32] Update KorMedMCQA: ver 2.0 (#2540) * Update KorMedMCQA: ver 2.0 * Fix pre-commit formatting issues * Update KorMedMCQA v2.0 * pre-commit --- lm_eval/tasks/kormedmcqa/README.md | 9 ++++--- lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml | 3 ++- ...{kormedmcqa_doctor.yaml => _template_yaml} | 17 ++++++++---- lm_eval/tasks/kormedmcqa/dentist.yaml | 3 +++ lm_eval/tasks/kormedmcqa/doctor.yaml | 3 +++ .../tasks/kormedmcqa/kormedmcqa_nurse.yaml | 26 ------------------- .../tasks/kormedmcqa/kormedmcqa_pharm.yaml | 26 ------------------- lm_eval/tasks/kormedmcqa/nurse.yaml | 3 +++ lm_eval/tasks/kormedmcqa/pharm.yaml | 3 +++ 9 files changed, 31 insertions(+), 62 deletions(-) rename lm_eval/tasks/kormedmcqa/{kormedmcqa_doctor.yaml => _template_yaml} (62%) create mode 100644 lm_eval/tasks/kormedmcqa/dentist.yaml create mode 100644 lm_eval/tasks/kormedmcqa/doctor.yaml delete mode 100644 lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml delete mode 100644 lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml create mode 100644 lm_eval/tasks/kormedmcqa/nurse.yaml create mode 100644 lm_eval/tasks/kormedmcqa/pharm.yaml diff --git a/lm_eval/tasks/kormedmcqa/README.md b/lm_eval/tasks/kormedmcqa/README.md index b4eb1134..54a666a1 100644 --- a/lm_eval/tasks/kormedmcqa/README.md +++ b/lm_eval/tasks/kormedmcqa/README.md @@ -25,20 +25,21 @@ Homepage: https://huggingface.co/datasets/sean0042/KorMedMCQA ### Groups and Tasks -* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, and `kormedmcqa_pharm`. +* `kormedmcqa`: Runs `kormedmcqa_doctor`, `kormedmcqa_nurse`, `kormedmcqa_pharm`, and `kormedmcqa_dentist`. #### Tasks * `kormedmcqa_doctor`: `Official Korean Doctor Examination` * `kormedmcqa_nurse`: `Official Korean Nurse Examination` * `kormedmcqa_pharm`: `Official Korean Pharmacist Examination` +* `kormedmcqa_dentist`: `Official Korean Dentist Examination` ### Checklist For adding novel benchmarks/datasets to the library: -* [x] Is the task an existing benchmark in the literature? - * [x] Have you referenced the original paper that introduced the task? - * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? +* [ ] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? If other tasks on this dataset are already supported: diff --git a/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml b/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml index d6548334..cac2329e 100644 --- a/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml +++ b/lm_eval/tasks/kormedmcqa/_kormedmcqa.yaml @@ -3,9 +3,10 @@ task: - kormedmcqa_doctor - kormedmcqa_nurse - kormedmcqa_pharm + - kormedmcqa_dentist aggregate_metric_list: - metric: exact_match aggregation: mean weight_by_size: true metadata: - version: 0.0 + version: 2.0 diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml b/lm_eval/tasks/kormedmcqa/_template_yaml similarity index 62% rename from lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml rename to lm_eval/tasks/kormedmcqa/_template_yaml index d130dbe8..1dae2062 100644 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_doctor.yaml +++ b/lm_eval/tasks/kormedmcqa/_template_yaml @@ -1,10 +1,10 @@ -task : kormedmcqa_doctor dataset_path : sean0042/KorMedMCQA -dataset_name : doctor test_split : test -fewshot_split : dev +fewshot_split : fewshot fewshot_config: sampler: first_n + doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답: {{['A', 'B', 'C', 'D', 'E'][answer-1]}}\n\n" + doc_to_target: "" output_type: generate_until doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" @@ -15,12 +15,19 @@ metric_list: ignore_case: true ignore_punctuation: true regexes_to_ignore: - - " " + - " " + - "\n" generation_kwargs: until: - "Q:" - - "\n\n" - "" + - "<|im_end|>" - "." + - "\n\n" do_sample: false temperature: 0.0 + max_gen_toks: 1024 +metadata: + version: 2.0 +dataset_kwargs: + trust_remote_code: true diff --git a/lm_eval/tasks/kormedmcqa/dentist.yaml b/lm_eval/tasks/kormedmcqa/dentist.yaml new file mode 100644 index 00000000..6a46c771 --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/dentist.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: dentist +task: kormedmcqa_dentist diff --git a/lm_eval/tasks/kormedmcqa/doctor.yaml b/lm_eval/tasks/kormedmcqa/doctor.yaml new file mode 100644 index 00000000..aac30e4c --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/doctor.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: doctor +task: kormedmcqa_doctor diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml b/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml deleted file mode 100644 index 026b6217..00000000 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_nurse.yaml +++ /dev/null @@ -1,26 +0,0 @@ -task : kormedmcqa_nurse -dataset_path : sean0042/KorMedMCQA -dataset_name : nurse -test_split : test -fewshot_split : dev -fewshot_config: - sampler: first_n -output_type: generate_until -doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - ignore_case: true - ignore_punctuation: true - regexes_to_ignore: - - " " -generation_kwargs: - until: - - "Q:" - - "\n\n" - - "" - - "." - do_sample: false - temperature: 0.0 diff --git a/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml b/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml deleted file mode 100644 index 91279dd7..00000000 --- a/lm_eval/tasks/kormedmcqa/kormedmcqa_pharm.yaml +++ /dev/null @@ -1,26 +0,0 @@ -task : kormedmcqa_pharm -dataset_path : sean0042/KorMedMCQA -dataset_name : pharm -test_split : test -fewshot_split : dev -fewshot_config: - sampler: first_n -output_type: generate_until -doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nE. {{E}}\n정답:" -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'][answer-1]}}" -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - ignore_case: true - ignore_punctuation: true - regexes_to_ignore: - - " " -generation_kwargs: - until: - - "Q:" - - "\n\n" - - "" - - "." - do_sample: false - temperature: 0.0 diff --git a/lm_eval/tasks/kormedmcqa/nurse.yaml b/lm_eval/tasks/kormedmcqa/nurse.yaml new file mode 100644 index 00000000..95894a5d --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/nurse.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: nurse +task: kormedmcqa_nurse diff --git a/lm_eval/tasks/kormedmcqa/pharm.yaml b/lm_eval/tasks/kormedmcqa/pharm.yaml new file mode 100644 index 00000000..8075fae3 --- /dev/null +++ b/lm_eval/tasks/kormedmcqa/pharm.yaml @@ -0,0 +1,3 @@ +include: _template_yaml +dataset_name: pharm +task: kormedmcqa_pharm -- GitLab From 88144079fc949ae58624db7af51beb37119d38c3 Mon Sep 17 00:00:00 2001 From: nike00811 Date: Tue, 21 Jan 2025 05:16:29 +0800 Subject: [PATCH 05/32] fix tmlu tmlu_taiwan_specific_tasks tag (#2420) --- lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml | 2 +- lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml | 2 +- lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml b/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml index 965084c8..a810322e 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_driving_rule.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_driving_rule" "task_alias": "driving rule" diff --git a/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml b/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml index 6a1fc7b2..3fa66f65 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_taiwan_tourist_resources.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_taiwan_tourist_resources" "task_alias": "taiwan tourist resources" diff --git a/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml b/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml index 987c2d7d..55e65c87 100644 --- a/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml +++ b/lm_eval/tasks/tmlu/default/tmlu_teacher_qualification.yaml @@ -9,7 +9,7 @@ D. {{choices[3]}}{% if choices is defined and choices|length > 4 %}\nE. {{choices[4]}}{%\ \ endif %}{% if choices is defined and choices|length > 5 %}\nF. {{choices[5]}}{%\ \ endif %}\nAnswer:" -"tag": "tmlu_taiwan_specific" +"tag": "tmlu_taiwan_specific_tasks" "include": "_default_template_yaml" "task": "tmlu_teacher_qualification" "task_alias": "teacher qualification" -- GitLab From 12b6eeb5b01cd1fe9da103e59b85e2c06bb82c93 Mon Sep 17 00:00:00 2001 From: "Ramiro R. C." Date: Mon, 20 Jan 2025 18:33:42 -0300 Subject: [PATCH 06/32] fixed mmlu generative response extraction (#2503) * fixed mmlu generative response extraction * updated file version | added args to exact_match * fix * fix * pre-commit * fix groups --------- Co-authored-by: Baber --- lm_eval/tasks/arabicmmlu/_generate_configs.py | 82 ++++++++++--------- lm_eval/tasks/mmlu/_generate_configs.py | 1 + .../mmlu/generative/_default_template_yaml | 16 +++- lm_eval/tasks/mmlu/generative/_mmlu.yaml | 20 ++--- 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/lm_eval/tasks/arabicmmlu/_generate_configs.py b/lm_eval/tasks/arabicmmlu/_generate_configs.py index ea59fe98..5dc627e5 100644 --- a/lm_eval/tasks/arabicmmlu/_generate_configs.py +++ b/lm_eval/tasks/arabicmmlu/_generate_configs.py @@ -13,46 +13,48 @@ from tqdm import tqdm eval_logger = logging.getLogger("lm-eval") -SUBJECTS = {'Islamic Studies': 'humanities', - 'Driving Test': 'other', - 'Natural Science (Middle School)': 'stem', - 'Natural Science (Primary School)': 'stem', - 'History (Primary School)': 'humanities', - 'History (Middle School)': 'humanities', - 'History (High School)': 'humanities', - 'General Knowledge': 'other', - 'General Knowledge (Primary School)': 'other', - 'General Knowledge (Middle School)': 'other', - 'Law (Professional)': 'humanities', - 'Physics (High School)': 'stem', - 'Social Science (Middle School)': 'social_science', - 'Social Science (Primary School)': 'social_science', - 'Management (University)': 'other', - 'Arabic Language (Primary School)': 'language', - 'Arabic Language (Middle School)': 'language', - 'Arabic Language (High School)': 'language', - 'Political Science (University)': 'social_science', - 'Philosophy (High School)': 'humanities', - 'Accounting (University)': 'social_science', - 'Computer Science (University)': 'stem', - 'Computer Science (Middle School)': 'stem', - 'Computer Science (Primary School)': 'stem', - 'Computer Science (High School)': 'stem', - 'Geography (Primary School)': 'social_science', - 'Geography (Middle School)': 'social_science', - 'Geography (High School)': 'social_science', - 'Math (Primary School)': 'stem', - 'Biology (High School)': 'stem', - 'Economics (University)': 'social_science', - 'Economics (Middle School)': 'social_science', - 'Economics (High School)': 'social_science', - 'Arabic Language (General)': 'language', - 'Arabic Language (Grammar)': 'language', - 'Islamic Studies (High School)': 'humanities', - 'Islamic Studies (Middle School)': 'humanities', - 'Islamic Studies (Primary School)': 'humanities', - 'Civics (Middle School)': 'social_science', - 'Civics (High School)': 'social_science'} +SUBJECTS = { + "Islamic Studies": "humanities", + "Driving Test": "other", + "Natural Science (Middle School)": "stem", + "Natural Science (Primary School)": "stem", + "History (Primary School)": "humanities", + "History (Middle School)": "humanities", + "History (High School)": "humanities", + "General Knowledge": "other", + "General Knowledge (Primary School)": "other", + "General Knowledge (Middle School)": "other", + "Law (Professional)": "humanities", + "Physics (High School)": "stem", + "Social Science (Middle School)": "social_science", + "Social Science (Primary School)": "social_science", + "Management (University)": "other", + "Arabic Language (Primary School)": "language", + "Arabic Language (Middle School)": "language", + "Arabic Language (High School)": "language", + "Political Science (University)": "social_science", + "Philosophy (High School)": "humanities", + "Accounting (University)": "social_science", + "Computer Science (University)": "stem", + "Computer Science (Middle School)": "stem", + "Computer Science (Primary School)": "stem", + "Computer Science (High School)": "stem", + "Geography (Primary School)": "social_science", + "Geography (Middle School)": "social_science", + "Geography (High School)": "social_science", + "Math (Primary School)": "stem", + "Biology (High School)": "stem", + "Economics (University)": "social_science", + "Economics (Middle School)": "social_science", + "Economics (High School)": "social_science", + "Arabic Language (General)": "language", + "Arabic Language (Grammar)": "language", + "Islamic Studies (High School)": "humanities", + "Islamic Studies (Middle School)": "humanities", + "Islamic Studies (Primary School)": "humanities", + "Civics (Middle School)": "social_science", + "Civics (High School)": "social_science", +} def parse_args(): diff --git a/lm_eval/tasks/mmlu/_generate_configs.py b/lm_eval/tasks/mmlu/_generate_configs.py index 28b94616..58876d4c 100644 --- a/lm_eval/tasks/mmlu/_generate_configs.py +++ b/lm_eval/tasks/mmlu/_generate_configs.py @@ -1,3 +1,4 @@ +# noqa """ Take in a YAML, and output all "other" splits with this YAML """ diff --git a/lm_eval/tasks/mmlu/generative/_default_template_yaml b/lm_eval/tasks/mmlu/generative/_default_template_yaml index 1452e0f5..7281f0a1 100644 --- a/lm_eval/tasks/mmlu/generative/_default_template_yaml +++ b/lm_eval/tasks/mmlu/generative/_default_template_yaml @@ -14,7 +14,21 @@ metric_list: - metric: exact_match aggregation: mean higher_is_better: true + ignore_punctuation: true + ignore_case: true +filter_list: + - name: get_response + filter: + # Filter everything after the first break line + - function: "regex" + regex_pattern: "^(.*?)(?=\\n|$)" + # Remove leading white spaces + - function: remove_whitespace + # function to ignore right white spaces or line breaks + - function: "regex" + regex_pattern: "^(.*?)\\s*$" + - function: take_first metadata: - version: 2.0 + version: 3.0 dataset_kwargs: trust_remote_code: true diff --git a/lm_eval/tasks/mmlu/generative/_mmlu.yaml b/lm_eval/tasks/mmlu/generative/_mmlu.yaml index 1a63611b..e4f4b5d5 100644 --- a/lm_eval/tasks/mmlu/generative/_mmlu.yaml +++ b/lm_eval/tasks/mmlu/generative/_mmlu.yaml @@ -5,29 +5,29 @@ task: task: - mmlu_stem_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: other task: - mmlu_other_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: social sciences task: - mmlu_social_sciences_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true - group: humanities task: - mmlu_humanities_generative aggregate_metric_list: - - metric: acc - weight_by_size: True + - metric: exact_match + weight_by_size: true aggregate_metric_list: - aggregation: mean metric: exact_match - weight_by_size: True + weight_by_size: true metadata: - version: 2 + version: 3 -- GitLab From ed9c6fc8db6076cfc86fd1c660fc54c96578eacb Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Wed, 22 Jan 2025 01:46:54 +0900 Subject: [PATCH 07/32] revise mbpp prompt (#2645) --- lm_eval/tasks/mbpp/mbpp.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lm_eval/tasks/mbpp/mbpp.yaml b/lm_eval/tasks/mbpp/mbpp.yaml index 101f1988..a5b58d90 100644 --- a/lm_eval/tasks/mbpp/mbpp.yaml +++ b/lm_eval/tasks/mbpp/mbpp.yaml @@ -4,9 +4,9 @@ dataset_name: full unsafe_code: true output_type: generate_until test_split: test -doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]" +doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n" doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}" -target_delimiter: "\n" +target_delimiter: "" metric_list: - metric: !function utils.pass_at_1 aggregation: mean -- GitLab From b2c090cc971e911c62f6f9a848c20cafb1488ec3 Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Wed, 22 Jan 2025 01:48:22 +0900 Subject: [PATCH 08/32] aggregate by group (total and categories) (#2643) --- lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml | 3 --- lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml | 11 +++++++++++ .../cot_hard/_kmmlu_cot_hard_applied_science.yaml | 8 ++++++++ .../tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml | 8 ++++++++ .../tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml | 8 ++++++++ .../tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml | 8 ++++++++ .../kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml | 3 ++- .../kmmlu_cot_hard_agricultural_sciences.yaml | 3 ++- ...cot_hard_aviation_engineering_and_maintenance.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_chemical_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_civil_engineering.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_computer_science.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_education.yaml | 3 ++- .../kmmlu_cot_hard_electrical_engineering.yaml | 3 ++- .../kmmlu_cot_hard_electronics_engineering.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_energy_management.yaml | 3 ++- .../kmmlu_cot_hard_environmental_science.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_food_processing.yaml | 3 ++- ...kmmlu_cot_hard_gas_technology_and_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_industrial_engineer.yaml | 3 ++- .../kmmlu_cot_hard_information_technology.yaml | 3 ++- ...mlu_cot_hard_interior_architecture_and_design.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml | 3 ++- lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml | 3 ++- ...mlu_cot_hard_machine_design_and_manufacturing.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_management.yaml | 3 ++- .../cot_hard/kmmlu_cot_hard_maritime_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml | 3 ++- .../kmmlu_cot_hard_materials_engineering.yaml | 3 ++- lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml | 3 ++- .../kmmlu_cot_hard_mechanical_engineering.yaml | 3 ++- .../kmmlu_cot_hard_nondestructive_testing.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml | 3 ++- ...mmlu_cot_hard_political_science_and_sociology.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml | 3 ++- ...u_cot_hard_railway_and_automotive_engineering.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml | 3 ++- .../kmmlu_cot_hard_refrigerating_machinery.yaml | 3 ++- .../kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml | 3 ++- .../tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml | 3 ++- ...rd_telecommunications_and_wireless_technology.yaml | 3 ++- lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml | 3 --- lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml | 11 +++++++++++ .../kmmlu/direct/_kmmlu_direct_applied_science.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml | 8 ++++++++ .../tasks/kmmlu/direct/kmmlu_direct_accounting.yaml | 1 + .../direct/kmmlu_direct_agricultural_sciences.yaml | 1 + ...u_direct_aviation_engineering_and_maintenance.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml | 1 + .../direct/kmmlu_direct_chemical_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml | 1 + .../kmmlu/direct/kmmlu_direct_civil_engineering.yaml | 1 + .../kmmlu/direct/kmmlu_direct_computer_science.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_construction.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_economics.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_education.yaml | 1 + .../direct/kmmlu_direct_electrical_engineering.yaml | 1 + .../direct/kmmlu_direct_electronics_engineering.yaml | 1 + .../kmmlu/direct/kmmlu_direct_energy_management.yaml | 1 + .../direct/kmmlu_direct_environmental_science.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml | 1 + .../kmmlu/direct/kmmlu_direct_food_processing.yaml | 1 + .../kmmlu_direct_gas_technology_and_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml | 1 + .../direct/kmmlu_direct_industrial_engineer.yaml | 1 + .../direct/kmmlu_direct_information_technology.yaml | 1 + ...kmmlu_direct_interior_architecture_and_design.yaml | 1 + .../kmmlu/direct/kmmlu_direct_korean_history.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml | 1 + ...kmmlu_direct_machine_design_and_manufacturing.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_management.yaml | 1 + .../direct/kmmlu_direct_maritime_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_marketing.yaml | 1 + .../direct/kmmlu_direct_materials_engineering.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml | 1 + .../direct/kmmlu_direct_mechanical_engineering.yaml | 1 + .../direct/kmmlu_direct_nondestructive_testing.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml | 1 + .../kmmlu_direct_political_science_and_sociology.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_psychology.yaml | 1 + .../kmmlu/direct/kmmlu_direct_public_safety.yaml | 1 + ...mlu_direct_railway_and_automotive_engineering.yaml | 1 + .../tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml | 1 + .../direct/kmmlu_direct_refrigerating_machinery.yaml | 1 + .../kmmlu/direct/kmmlu_direct_social_welfare.yaml | 1 + lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml | 1 + ...ct_telecommunications_and_wireless_technology.yaml | 1 + .../tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml | 3 --- .../tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml | 11 +++++++++++ .../_kmmlu_direct_hard_applied_science.yaml | 8 ++++++++ .../kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml | 8 ++++++++ .../kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml | 8 ++++++++ .../kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml | 8 ++++++++ .../direct_hard/kmmlu_direct_hard_accounting.yaml | 3 ++- .../kmmlu_direct_hard_agricultural_sciences.yaml | 3 ++- ...ect_hard_aviation_engineering_and_maintenance.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml | 3 ++- .../kmmlu_direct_hard_chemical_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_chemistry.yaml | 3 ++- .../kmmlu_direct_hard_civil_engineering.yaml | 3 ++- .../kmmlu_direct_hard_computer_science.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_construction.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_criminal_law.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_economics.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_education.yaml | 3 ++- .../kmmlu_direct_hard_electrical_engineering.yaml | 3 ++- .../kmmlu_direct_hard_electronics_engineering.yaml | 3 ++- .../kmmlu_direct_hard_energy_management.yaml | 3 ++- .../kmmlu_direct_hard_environmental_science.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml | 3 ++- .../kmmlu_direct_hard_food_processing.yaml | 3 ++- ...lu_direct_hard_gas_technology_and_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_geomatics.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_health.yaml | 3 ++- .../kmmlu_direct_hard_industrial_engineer.yaml | 3 ++- .../kmmlu_direct_hard_information_technology.yaml | 3 ++- ..._direct_hard_interior_architecture_and_design.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_korean_history.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_law.yaml | 3 ++- ..._direct_hard_machine_design_and_manufacturing.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_management.yaml | 3 ++- .../kmmlu_direct_hard_maritime_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_marketing.yaml | 3 ++- .../kmmlu_direct_hard_materials_engineering.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_math.yaml | 3 ++- .../kmmlu_direct_hard_mechanical_engineering.yaml | 3 ++- .../kmmlu_direct_hard_nondestructive_testing.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml | 3 ++- ...u_direct_hard_political_science_and_sociology.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_psychology.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_public_safety.yaml | 3 ++- ...irect_hard_railway_and_automotive_engineering.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_real_estate.yaml | 3 ++- .../kmmlu_direct_hard_refrigerating_machinery.yaml | 3 ++- .../direct_hard/kmmlu_direct_hard_social_welfare.yaml | 3 ++- .../kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml | 3 ++- ...rd_telecommunications_and_wireless_technology.yaml | 3 ++- lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml | 6 ------ lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml | 11 +++++++++++ .../tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml | 8 ++++++++ lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml | 1 + .../kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml | 1 + ...mlu_hard_aviation_engineering_and_maintenance.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml | 1 + .../kmmlu/hard/kmmlu_hard_chemical_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml | 1 + .../kmmlu/hard/kmmlu_hard_civil_engineering.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml | 1 + .../kmmlu/hard/kmmlu_hard_electrical_engineering.yaml | 1 + .../hard/kmmlu_hard_electronics_engineering.yaml | 1 + .../kmmlu/hard/kmmlu_hard_energy_management.yaml | 1 + .../kmmlu/hard/kmmlu_hard_environmental_science.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml | 1 + .../kmmlu_hard_gas_technology_and_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml | 1 + .../kmmlu/hard/kmmlu_hard_industrial_engineer.yaml | 1 + .../kmmlu/hard/kmmlu_hard_information_technology.yaml | 1 + .../kmmlu_hard_interior_architecture_and_design.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml | 1 + .../kmmlu_hard_machine_design_and_manufacturing.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml | 1 + .../kmmlu/hard/kmmlu_hard_maritime_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml | 1 + .../kmmlu/hard/kmmlu_hard_materials_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml | 1 + .../kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml | 1 + .../kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml | 1 + .../kmmlu_hard_political_science_and_sociology.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml | 1 + ...kmmlu_hard_railway_and_automotive_engineering.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml | 1 + .../hard/kmmlu_hard_refrigerating_machinery.yaml | 1 + .../tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml | 1 + lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml | 1 + ...rd_telecommunications_and_wireless_technology.yaml | 1 + 204 files changed, 442 insertions(+), 105 deletions(-) create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml create mode 100644 lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml create mode 100644 lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml create mode 100644 lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml create mode 100644 lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml diff --git a/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml b/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml index 163a03df..0c0fadf7 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/_cot_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard_cot dataset_path: HAERAE-HUB/KMMLU-HARD output_type: generate_until validation_split: dev # not meant to be used, only here to silence warnings diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml new file mode 100644 index 00000000..1e459a05 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_cot_hard +task: + - kmmlu_cot_hard_stem + - kmmlu_cot_hard_other + - kmmlu_cot_hard_applied_science + - kmmlu_cot_hard_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml new file mode 100644 index 00000000..4944cefb --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_applied_science +task: + - kmmlu_cot_hard_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml new file mode 100644 index 00000000..7b30f358 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_humss +task: + - kmmlu_cot_hard_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml new file mode 100644 index 00000000..70329cf4 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_other +task: + - kmmlu_cot_hard_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml new file mode 100644 index 00000000..65d92fe2 --- /dev/null +++ b/lm_eval/tasks/kmmlu/cot_hard/_kmmlu_cot_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_cot_hard_stem +task: + - kmmlu_cot_hard_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml index bb17436e..0a89dce5 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_accounting.yaml @@ -78,4 +78,5 @@ fewshot_config: 당기순이익은 과소 계상됩니다. 왜냐하면 매출원가가 더 높아지면 이익은 줄어들기 때문입니다. , 상품재고액을 과대 계상한 경우 매출원가는 과대 계상되고, 당기순이익은 과소 계상됩니다. '따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_accounting +task: kmmlu_cot_hard_accounting +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml index b100094b..d3ab5734 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_agricultural_sciences.yaml @@ -80,4 +80,5 @@ fewshot_config: 각 선택지를 분석한 결과 (C) 선택지인 '감자의 바이러스 병을 막기 위해 평지에서 채종한다.'가 가장 잘못된 방법으로 보입니다. 이는 감자의 바이러스 병 예방과 평지에서의 채종 사이에 직접적인 연관성이 없기 때문입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_agricultural_sciences +task: kmmlu_cot_hard_agricultural_sciences +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml index f9cd217f..dcc59f88 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml @@ -85,4 +85,5 @@ fewshot_config: (D) 옆놀이의 안정성 향상을 위해서는 트위스트가 중요한 역할을 합니다. 트위스트는 날개 팁 부분의 각도를 조절하여, 항공기가 고속에서도 안정적으로 비행할 수 있도록 돕습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_aviation_engineering_and_maintenance +task: kmmlu_cot_hard_aviation_engineering_and_maintenance +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml index 4d6e52b7..52e0c77d 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_biology.yaml @@ -80,4 +80,5 @@ fewshot_config: 없어야 합니다. 이러한 조건을 충족하는 미생물은 절대호산성 미생물입니다. 절대호산성 미생물은 극도로 산성 환경에서만 생존할 수 있으며, 중성 또는 알칼리성 환경에서는 성장할 수 없습니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_biology +task: kmmlu_cot_hard_biology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml index 9b7435d3..49ebe866 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemical_engineering.yaml @@ -87,4 +87,5 @@ fewshot_config: 압력, V는 부피입니다. W = -P1Vln(P2/P1) = -(10×10^5 Pa)(0.05m^3)ln((1×10^5 Pa)/(10×10^5 Pa)) = 0입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_chemical_engineering +task: kmmlu_cot_hard_chemical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml index d761f5e2..0cfd1dff 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_chemistry.yaml @@ -76,4 +76,5 @@ fewshot_config: 황산의 분자량은 98g/mol입니다. 황산의 몰 수는 49g ÷ 98g/mol = 0.5mol입니다. 이 수용액의 물 농도는 0.5mol/1L = 0.5M입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_chemistry +task: kmmlu_cot_hard_chemistry +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml index 87d3d22e..13893796 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_civil_engineering.yaml @@ -97,4 +97,5 @@ fewshot_config: 것이며, 이 계약은 미국의 근대도시계획 성립기에 지역제의 바탕이 된 제도는 (A) 협약(covenant)이 가장 적절한 선택입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_civil_engineering +task: kmmlu_cot_hard_civil_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml index 463b8e75..f8399409 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_computer_science.yaml @@ -96,4 +96,5 @@ fewshot_config: 주어진 설명에서 언급된 감사 추적(Auditing)이나 Shadow Password와 같은 부가적인 기능보다는 사용자 간 침범 차단과 사용자별 파일 권한 설정에 초점을 맞춘 것으로 정의됩니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_computer_science +task: kmmlu_cot_hard_computer_science +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml index a277f637..3cfb3e9f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_construction.yaml @@ -83,4 +83,5 @@ fewshot_config: 압축비가 9입니다. 이를 식에 대입하여 연소실 체적을 계산해 보겠습니다. 행정체적 = 240 압축비 = 9 연소실_체적 = 행정체적 / (압축비 - 1) = 240 / 8 = 30 연소실의 체적은 30cc입니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_construction +task: kmmlu_cot_hard_construction +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml index fa46f0f4..559ff679 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_criminal_law.yaml @@ -106,4 +106,5 @@ fewshot_config: 고려에 따라 변경된 경우에 형법 제1조 제2항이 적용되는 것은 맞지만, 법령의 변경이 있더라도 그것이 반성적 고려에서 비롯된 것이 아니라면 형법 제1조 제2항이 적용되지 않습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_criminal_law +task: kmmlu_cot_hard_criminal_law +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml index 0da8a9a7..0d59eaa7 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_ecology.yaml @@ -88,4 +88,5 @@ fewshot_config: 질소는 질소 가스나 암모니아의 형태로 존재합니다. , 위의 분석에 따르면, (B) 옵션의 설명이 잘못되었습니다. 아연과 코발트는 일반적으로 미량원소로 분류되지만, 이 설명에서는 그들을 다량 원소로 잘못 분류하고 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_ecology +task: kmmlu_cot_hard_ecology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml index baaa3bd9..838e0bd5 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_economics.yaml @@ -87,4 +87,5 @@ fewshot_config: - 7,000,000원)은 어떻게 처리되었을까요? 이 금액은 회사의 이익에서 차감되어야 합니다. 이는 이익잉여금, 즉 회사가 벌어들인 이익에서 퇴직금 등을 지급한 후 남은 금액이 감소한다는 의미입니다. 선택지 D는 옳지 않습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_economics +task: kmmlu_cot_hard_economics +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml index 0e68a0b0..899c38a8 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_education.yaml @@ -81,4 +81,5 @@ fewshot_config: 수 있습니다. 하지만, 평생교육법에는 전국평생학습도시협의회의 구성 및 운영에 필요한 사항은 교육부령으로 정한다는 내용은 없습니다. 이를 통해 선택지 (B)가 평생교육법 상평생학습도시에 대한 설명으로 옳지 않다는 것을 알 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_education +task: kmmlu_cot_hard_education +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml index 3b5af8b7..f68f2ca7 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electrical_engineering.yaml @@ -93,4 +93,5 @@ fewshot_config: Pyrometer)입니다. 이는 흑체 또는 비슷한 조건의 물체로부터 방사되는 가시광선의 강도를 통해 온도를 측정하는 방식을 사용하며, 흑체의 방사율을 1로 설정하여 보정하는 원리를 기반으로 합니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_electrical_engineering +task: kmmlu_cot_hard_electrical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml index 4d132703..ce9ce0e3 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_electronics_engineering.yaml @@ -79,4 +79,5 @@ fewshot_config: 합니다. 마지막으로 선택지 (D)는 컴퓨터 시스템의 하드웨어 오류를 발견하고 그에 대한 적절한 조치를 한다는 내용입니다. 이 역시 운영체제의 기능으로, 하드웨어 오류를 감지하고 적절한 처리를 하여 시스템의 안정성을 유지하는 역할을 합니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_electronics_engineering +task: kmmlu_cot_hard_electronics_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml index 8dca183a..0c5e18b1 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_energy_management.yaml @@ -85,4 +85,5 @@ fewshot_config: 요인이 아닙니다. , 태양광발전 모듈의 I-V 특성곡선에서 일사량에 따라 가장 많이 변화하는 것은 전류입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_energy_management +task: kmmlu_cot_hard_energy_management +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml index d9080b07..47de0dca 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_environmental_science.yaml @@ -82,4 +82,5 @@ fewshot_config: 전통적인 구성요소는 아닙니다. 과정분석은 보다 일반적인 용어로, 다양한 맥락에서 사용될 수 있습니다. (D) 목록분석 (Inventory Analysis): 이 역시 LCA의 핵심 단계 중 하나입니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_environmental_science +task: kmmlu_cot_hard_environmental_science +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml index 983a6590..598aad05 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_fashion.yaml @@ -84,4 +84,5 @@ fewshot_config: 수선 등을 포함한 종합적인 서비스를 제공하는 것으로 보입니다. 이는 일반적인 클리닝 서비스와는 차별화된 서비스라고 볼 수 있습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_fashion +task: kmmlu_cot_hard_fashion +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml index 2d3473f0..3cbec3d8 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_food_processing.yaml @@ -78,4 +78,5 @@ fewshot_config: 이 품종은 상대적으로 높은 온도에 더 민감하게 반응하며, 일장의 변화에는 덜 민감한 특성을 가지고 있어 한국의 기후 특성에서 효과적으로 성장할 수 있는 조건을 가지고 있습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_food_processing +task: kmmlu_cot_hard_food_processing +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml index a244b955..49551077 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_gas_technology_and_engineering.yaml @@ -85,4 +85,5 @@ fewshot_config: 이들은 모두 환경에 해롭습니다. 물은 염소 가스의 재해 방지용으로서의 흡수제나 재해제로서 적합하지 않습니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_gas_technology_and_engineering +task: kmmlu_cot_hard_gas_technology_and_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml index cfc4866a..961b20ce 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_geomatics.yaml @@ -75,4 +75,5 @@ fewshot_config: 공식은 실제 거리의 제곱근에 축척분모를 곱한 값이 측정된 면적이 될 것입니다. 이렇게 보면, 공식이 의미하는 바를 잘 나타내는 것 같습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_geomatics +task: kmmlu_cot_hard_geomatics +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml index d5b47791..29a432e4 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_health.yaml @@ -85,4 +85,5 @@ fewshot_config: 일반적으로는 사업장에서 자체적으로 실시하는 것이 일반적입니다. , 국민건강증진기금의 사용 범위에는 포함되지 않을 수 있습니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_health +task: kmmlu_cot_hard_health +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml index fc8f3dec..f087d221 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_industrial_engineer.yaml @@ -82,4 +82,5 @@ fewshot_config: 절삭저항의 대부분을 차지합니다. 이러한 정보를 바탕으로, 주분력이 절삭저항의 대부분을 차지하므로, 탄소강을 가공할 때 가장 큰 절삭저항을 주는 것은 주분력일 것이라고 추론할 수 있습니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_industrial_engineer +task: kmmlu_cot_hard_industrial_engineer +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml index aa557282..3d6fd3ce 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_information_technology.yaml @@ -80,4 +80,5 @@ fewshot_config: 피어스 B-E형 발진 회로에서는 컬렉터-이미터 간의 임피던스가 유도성일 때 가장 안정한 발진을 지속할 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_information_technology +task: kmmlu_cot_hard_information_technology +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml index 2b3849d7..84a61a70 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_interior_architecture_and_design.yaml @@ -92,4 +92,5 @@ fewshot_config: 지칭하지 않으며, 실제 설계 및 계획 과정에서는 보통 최소값, 최대값, 또는 목표값과 같이 더 구체적이고 명확한 기준을 바탕으로 최적치수를 결정합니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_interior_architecture_and_design +task: kmmlu_cot_hard_interior_architecture_and_design +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml index 7498080c..18666a70 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_korean_history.yaml @@ -97,4 +97,5 @@ fewshot_config: 군사 기구로, 흥선대원군은 왕권 강화를 위해 비변사의 기능을 약화시켰습니다. (D) 통상 수교 거부 정책을 추진하였다 - 흥선대원군은 외세의 침략을 막기 위해 통상 수교 거부 정책을 추진하였습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_korean_history +task: kmmlu_cot_hard_korean_history +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml index 0328a010..ddbd97b6 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_law.yaml @@ -81,4 +81,5 @@ fewshot_config: 중 하나입니다. (D) 네트워크 취약성으로 발생하는 문제는 물리적 통제절차의 개선으로 해결해야 한다는 것은, 네트워크 보안 문제를 해결하기 위해 물리적인 통제 절차를 개선하는 것입니다. 이는 네트워크 보안을 강화하는 데 매우 중요한 역할을 합니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_law +task: kmmlu_cot_hard_law +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml index 4c6207bb..d1e0d88b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_machine_design_and_manufacturing.yaml @@ -83,4 +83,5 @@ fewshot_config: 선택지는 해칭이 주된 중심선 또는 단면도의 주된 외형선에 대하여 90℃ 기울기로 그린다는 내용인데, 이는 잘못된 내용입니다. 일반적으로 해칭은 45도 기울기로 그려집니다. , 이 선택지는 해칭의 일반적인 원칙을 잘못 설명하고 있습니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_machine_design_and_manufacturing +task: kmmlu_cot_hard_machine_design_and_manufacturing +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml index 11628904..435d762f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_management.yaml @@ -76,4 +76,5 @@ fewshot_config: 각 부문별로 목표를 정하고 분산된 시스템을 구축하는 것은 물류 시스템의 효율성을 높일 수 있지만, 이는 통합적인 관리가 어려울 수 있습니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_management +task: kmmlu_cot_hard_management +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml index e168371f..bb7103eb 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_maritime_engineering.yaml @@ -98,4 +98,5 @@ fewshot_config: (D) 아르곤: 아르곤도 불활성 기체로, 지방질에 용해되거나 마취 효과를 나타내지 않습니다. 아르곤은 주로 산업 공정에서 보호 가스로 사용됩니다. 따라서, 정답은 (B) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_maritime_engineering +task: kmmlu_cot_hard_maritime_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml index 240d92a2..971a106b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_marketing.yaml @@ -91,4 +91,5 @@ fewshot_config: 있으며, 상담원이 고객의 반론에 대한 자연스러운 대응력을 갖추면 고객의 불만이나 반대를 효과적으로 처리할 수 있습니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_marketing +task: kmmlu_cot_hard_marketing +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml index 59774a15..4f5867e2 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_materials_engineering.yaml @@ -84,4 +84,5 @@ fewshot_config: 구별하는 데 사용될 수 있습니다. 냉간가공은 재결성 온도 이하에서 이루어지며, 열간가공은 재결성 온도 이상에서 이루어집니다. , 냉간가공과 열간가공을 구별하는 기준은 재결성 온도라고 할 수 있습니다. 따라서, 정답은 (C) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_materials_engineering +task: kmmlu_cot_hard_materials_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml index 103bc573..5aa474d2 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_math.yaml @@ -95,4 +95,5 @@ fewshot_config: + ω2019 입니다. , ω^2017 + ω^2019 = ω + 1 입니다. 주어진 식에 ω + 1을 대입하면 ω + 1 + ω + 1 + 1 + 1이 됩니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_math +task: kmmlu_cot_hard_math +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml index a57d0661..8d99ba72 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_mechanical_engineering.yaml @@ -77,4 +77,5 @@ fewshot_config: 어떤 것일까요? V벨트의 단면 크기는 알파벳이 뒤로 갈수록 커집니다 즉, A형은 B형보다 작고, B형은 C형보다 작으며, 이런 식으로 D형, E형으로 진행됩니다. , 주어진 선택지 중에서 가장 단면이 큰 V벨트는 E형일 것입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_mechanical_engineering +task: kmmlu_cot_hard_mechanical_engineering +tag: kmmlu_cot_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml index c7ecea17..656b08ac 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_nondestructive_testing.yaml @@ -91,4 +91,5 @@ fewshot_config: 시험체의 두께 t를 계산하면 다음과 같습니다. t = v / (2f) = 4800 / (2 * 2 * 10^6) = 0.0012m = 1.2mm 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_nondestructive_testing +task: kmmlu_cot_hard_nondestructive_testing +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml index 1e5607a5..30b60825 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_patent.yaml @@ -110,4 +110,5 @@ fewshot_config: 발명에 대해서는 먼저 출원한 자만이 특허를 받을 수 있다고 규정하고 있으므로, 乙은 특허를 받을 수 없습니다. , (D)는 옳은 설명입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_patent +task: kmmlu_cot_hard_patent +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml index 50c159f9..7d8c4e56 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_political_science_and_sociology.yaml @@ -88,4 +88,5 @@ fewshot_config: 범위에서도 활용되는 전략입니다. 도시의 이미지를 국제적으로 홍보하고, 외국인 투자자나 관광객을 유치하는 것이 도시마케팅의 일부이기 때문입니다. 도시마케팅의 공간적 범위가 국내로만 한정되어 있다는 것은 잘못된 설명입니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_political_science_and_sociology +task: kmmlu_cot_hard_political_science_and_sociology +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml index f86d14e6..125befe1 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_psychology.yaml @@ -95,4 +95,5 @@ fewshot_config: 이러한 분석을 통해 고급 상담자의 특징은 (C) 내담자에게 의도적으로 주의를 기울이고 중요한 정보를 수집하고 인식할 수 있다는 것으로 보입니다. 이는 상담자의 기본적인 역량을 넘어서서 고급 상담자가 갖추어야 할 능력으로 보입니다. 따라서, 정답은 (C) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_psychology +task: kmmlu_cot_hard_psychology +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml index 5cc5c148..5627770b 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_public_safety.yaml @@ -90,4 +90,5 @@ fewshot_config: 산업안전ᆞ보건과 관련된 그 밖의 사항 , 선택지 중에서 산업안전보건위원회의 심의ᆞ의결을 거치지 않아도 되는 사항은 (B) 안전ᆞ보건과 관련된 안전장치 구입 시의 적격품 여부 확인에 관한 사항입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_public_safety +task: kmmlu_cot_hard_public_safety +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml index c81e158a..5b8b436f 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_railway_and_automotive_engineering.yaml @@ -85,4 +85,5 @@ fewshot_config: 위한 것입니다. (D) 기관의 과냉 및 소음방지를 위해 일정 회전수 이상 시 슬립 발생: 유체 커플링식 냉각 팬은 기관의 과냉 및 소음 방지를 위해 일정 회전수 이상 시 슬립이 발생합니다. 이는 유체 커플링의 특성 때문입니다. 따라서, 정답은 (A) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_railway_and_automotive_engineering +task: kmmlu_cot_hard_railway_and_automotive_engineering +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml index 0e7d8100..38df4312 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_real_estate.yaml @@ -88,4 +88,5 @@ fewshot_config: 따르면 개업공인중개사는 등록한 관할구역 외의 지역에 있는 중개대상물을 중개할 수 있습니다. 이 내용은 잘못된 내용입니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_real_estate +task: kmmlu_cot_hard_real_estate +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml index 7c3984e4..10624f2a 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_refrigerating_machinery.yaml @@ -88,4 +88,5 @@ fewshot_config: = 200.15K입니다. 그러므로, W = 1kJ * (300.15K - 200.15K) / 200.15K = 0.5kJ입니다. 따라서, 정답은 (D) 입니다.' include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_refrigerating_machinery +task: kmmlu_cot_hard_refrigerating_machinery +tag: kmmlu_cot_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml index d19fb511..64e6fb5a 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_social_welfare.yaml @@ -90,4 +90,5 @@ fewshot_config: 이는 사회복지정책의 본질적인 목표와 원칙을 반영하지 못하고 있습니다. 사회복지정책은 능력이 아닌 필요에 따라 지원을 하는 것이 원칙이며, 이를 통해 사회적 불평등을 해소하고 모든 사람이 기본적인 생활을 유지할 수 있도록 지원하는 것이 목표입니다. 따라서, 정답은 (B) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_social_welfare +task: kmmlu_cot_hard_social_welfare +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml index 937a864e..fbf88067 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_taxation.yaml @@ -104,4 +104,5 @@ fewshot_config: 국가의 안전보장 목적의 수행상 긴요하다고 인정하여 수입하는 물품을 의미합니다. 이 또한 국가의 안전보장을 위해 필요한 물품이므로 면세 대상에 해당할 것으로 보입니다. 따라서, 정답은 (A) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_taxation +task: kmmlu_cot_hard_taxation +tag: kmmlu_cot_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml index ca23afc0..54c5aac8 100644 --- a/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/cot_hard/kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml @@ -83,4 +83,5 @@ fewshot_config: 증가하면, 전자기파의 세기는 1/r^2배 감소합니다. , 거리가 2배가 되면, 전자기파의 세기는 1/4배가 됩니다. 그리고 전력 밀도는 전기장과 자기장의 제곱에 비례하므로, 거리가 2배가 되면 전력 밀도는 1/4배가 됩니다. 따라서, 정답은 (D) 입니다. include: _cot_kmmlu_yaml -task: kmmlu_hard_cot_telecommunications_and_wireless_technology +task: kmmlu_cot_hard_telecommunications_and_wireless_technology +tag: kmmlu_cot_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml b/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml index a0c8dfdc..1ecb5fba 100644 --- a/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/direct/_direct_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_direct dataset_path: HAERAE-HUB/KMMLU output_type: generate_until test_split: test diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml new file mode 100644 index 00000000..9763d3d4 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct.yaml @@ -0,0 +1,11 @@ +group: kmmlu_direct +task: + - kmmlu_direct_stem + - kmmlu_direct_other + - kmmlu_direct_applied_science + - kmmlu_direct_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml new file mode 100644 index 00000000..78937b3f --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_applied_science +task: + - kmmlu_direct_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml new file mode 100644 index 00000000..1c8e4f20 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_humss +task: + - kmmlu_direct_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml new file mode 100644 index 00000000..eb5166ec --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_other +task: + - kmmlu_direct_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml new file mode 100644 index 00000000..932cc1e5 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct/_kmmlu_direct_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_stem +task: + - kmmlu_direct_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml index d7736e8d..d61a84b8 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: Accounting include: _direct_kmmlu_yaml task: kmmlu_direct_accounting +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml index 5bf1fa4b..a8a2829b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: Agricultural-Sciences include: _direct_kmmlu_yaml task: kmmlu_direct_agricultural_sciences +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml index a9a62193..d383834f 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: Aviation-Engineering-and-Maintenance include: _direct_kmmlu_yaml task: kmmlu_direct_aviation_engineering_and_maintenance +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml index ebe1765b..aeeb1e52 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml @@ -1,3 +1,4 @@ dataset_name: Biology include: _direct_kmmlu_yaml task: kmmlu_direct_biology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml index e5875bb7..921073d5 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Chemical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_chemical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml index edabfb67..afa5b4b2 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: Chemistry include: _direct_kmmlu_yaml task: kmmlu_direct_chemistry +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml index 98ed98dd..b8c5064b 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Civil-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_civil_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml index c546e738..bac82f1f 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: Computer-Science include: _direct_kmmlu_yaml task: kmmlu_direct_computer_science +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml index a0af2a16..8cb9ada9 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml @@ -1,3 +1,4 @@ dataset_name: Construction include: _direct_kmmlu_yaml task: kmmlu_direct_construction +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml index 9dfdfabc..642a88bc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: Criminal-Law include: _direct_kmmlu_yaml task: kmmlu_direct_criminal_law +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml index 9d182903..dffbb3c4 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: Ecology include: _direct_kmmlu_yaml task: kmmlu_direct_ecology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml index db4d7840..1fc5d2c3 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml @@ -1,3 +1,4 @@ dataset_name: Economics include: _direct_kmmlu_yaml task: kmmlu_direct_economics +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml index 74887e76..dc151c87 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml @@ -1,3 +1,4 @@ dataset_name: Education include: _direct_kmmlu_yaml task: kmmlu_direct_education +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml index 3455d507..208e7b16 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Electrical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_electrical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml index b45aa308..0a61e3d1 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Electronics-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_electronics_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml index b4fb806b..085f4246 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: Energy-Management include: _direct_kmmlu_yaml task: kmmlu_direct_energy_management +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml index 1670ff16..104a4b9e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: Environmental-Science include: _direct_kmmlu_yaml task: kmmlu_direct_environmental_science +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml index aef8043a..561e565c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: Fashion include: _direct_kmmlu_yaml task: kmmlu_direct_fashion +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml index f49b087f..3050c82a 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: Food-Processing include: _direct_kmmlu_yaml task: kmmlu_direct_food_processing +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml index 00b7021c..708e76d8 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Gas-Technology-and-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_gas_technology_and_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml index 5d8dc70d..0937bcfc 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: Geomatics include: _direct_kmmlu_yaml task: kmmlu_direct_geomatics +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml index 3f0d77eb..70ef5736 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml @@ -1,3 +1,4 @@ dataset_name: Health include: _direct_kmmlu_yaml task: kmmlu_direct_health +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml index 39ea0bcf..14545201 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: Industrial-Engineer include: _direct_kmmlu_yaml task: kmmlu_direct_industrial_engineer +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml index c42e80ed..50fc6e91 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: Information-Technology include: _direct_kmmlu_yaml task: kmmlu_direct_information_technology +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml index 842534aa..638de434 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: Interior-Architecture-and-Design include: _direct_kmmlu_yaml task: kmmlu_direct_interior_architecture_and_design +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml index f1aa277a..6d6b20ba 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: Korean-History include: _direct_kmmlu_yaml task: kmmlu_direct_korean_history +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml index 602f8982..29685852 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml @@ -1,3 +1,4 @@ dataset_name: Law include: _direct_kmmlu_yaml task: kmmlu_direct_law +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml index bfb923c2..587d25d0 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: Machine-Design-and-Manufacturing include: _direct_kmmlu_yaml task: kmmlu_direct_machine_design_and_manufacturing +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml index 7352a136..aec441bb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml @@ -1,3 +1,4 @@ dataset_name: Management include: _direct_kmmlu_yaml task: kmmlu_direct_management +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml index fa0c8f31..e7e1f12e 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Maritime-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_maritime_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml index c3b524d8..10dadc00 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: Marketing include: _direct_kmmlu_yaml task: kmmlu_direct_marketing +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml index f04e0975..d0463266 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Materials-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_materials_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml index 6c5d28af..20d17c01 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml @@ -1,3 +1,4 @@ dataset_name: Math include: _direct_kmmlu_yaml task: kmmlu_direct_math +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml index a253535a..3ddb2796 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Mechanical-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_mechanical_engineering +tag: kmmlu_direct_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml index 3b8dc7e7..3e37bd1c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: Nondestructive-Testing include: _direct_kmmlu_yaml task: kmmlu_direct_nondestructive_testing +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml index 2afff2c3..e829b995 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml @@ -1,3 +1,4 @@ dataset_name: Patent include: _direct_kmmlu_yaml task: kmmlu_direct_patent +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml index 2209abbf..adf6c1b7 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: Political-Science-and-Sociology include: _direct_kmmlu_yaml task: kmmlu_direct_political_science_and_sociology +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml index 140302d0..a8ccfcbd 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: Psychology include: _direct_kmmlu_yaml task: kmmlu_direct_psychology +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml index 5bb16a90..5926a45c 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: Public-Safety include: _direct_kmmlu_yaml task: kmmlu_direct_public_safety +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml index 2a13204a..fa92c9fb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: Railway-and-Automotive-Engineering include: _direct_kmmlu_yaml task: kmmlu_direct_railway_and_automotive_engineering +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml index 5a5202b6..e8872a53 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: Real-Estate include: _direct_kmmlu_yaml task: kmmlu_direct_real_estate +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml index 44f9e428..73787390 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: Refrigerating-Machinery include: _direct_kmmlu_yaml task: kmmlu_direct_refrigerating_machinery +tag: kmmlu_direct_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml index fa13bdff..52f731fb 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: Social-Welfare include: _direct_kmmlu_yaml task: kmmlu_direct_social_welfare +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml index 69e71d6d..caa0d798 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: Taxation include: _direct_kmmlu_yaml task: kmmlu_direct_taxation +tag: kmmlu_direct_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml index f4d1fd05..8f98b1d4 100644 --- a/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct/kmmlu_direct_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: Telecommunications-and-Wireless-Technology include: _direct_kmmlu_yaml task: kmmlu_direct_telecommunications_and_wireless_technology +tag: kmmlu_direct_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml b/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml index 3cf63592..f5ed0fda 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/_direct_hard_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard_direct dataset_path: HAERAE-HUB/KMMLU-HARD output_type: generate_until test_split: test diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml new file mode 100644 index 00000000..54206cdb --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_direct_hard +task: + - kmmlu_direct_hard_stem + - kmmlu_direct_hard_other + - kmmlu_direct_hard_applied_science + - kmmlu_direct_hard_humss +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml new file mode 100644 index 00000000..0f70ae13 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_applied_science +task: + - kmmlu_direct_hard_applied_science_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml new file mode 100644 index 00000000..b28fdd15 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_humss +task: + - kmmlu_direct_hard_humss_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml new file mode 100644 index 00000000..f216caa6 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_other +task: + - kmmlu_direct_hard_other_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml new file mode 100644 index 00000000..026c6b48 --- /dev/null +++ b/lm_eval/tasks/kmmlu/direct_hard/_kmmlu_direct_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_direct_hard_stem +task: + - kmmlu_direct_hard_stem_tasks +aggregate_metric_list: + - metric: exact_match + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml index ca805e95..d92b933d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: accounting include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_accounting +task: kmmlu_direct_hard_accounting +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml index 73483444..d78427d0 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: agricultural_sciences include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_agricultural_sciences +task: kmmlu_direct_hard_agricultural_sciences +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml index 25c91cb6..6713f04d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: aviation_engineering_and_maintenance include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_aviation_engineering_and_maintenance +task: kmmlu_direct_hard_aviation_engineering_and_maintenance +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml index a7bc8417..e98a380f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml @@ -1,3 +1,4 @@ dataset_name: biology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_biology +task: kmmlu_direct_hard_biology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml index 063974af..b505e317 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: chemical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_chemical_engineering +task: kmmlu_direct_hard_chemical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml index 371db7bf..d805e234 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: chemistry include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_chemistry +task: kmmlu_direct_hard_chemistry +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml index ba2c23b2..30622d50 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: civil_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_civil_engineering +task: kmmlu_direct_hard_civil_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml index 2a388ff4..bc0f5a37 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: computer_science include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_computer_science +task: kmmlu_direct_hard_computer_science +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml index faab391b..e050e106 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml @@ -1,3 +1,4 @@ dataset_name: construction include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_construction +task: kmmlu_direct_hard_construction +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml index d2679f1e..3072b6f0 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: criminal_law include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_criminal_law +task: kmmlu_direct_hard_criminal_law +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml index adedf9d6..3129f467 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: ecology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_ecology +task: kmmlu_direct_hard_ecology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml index f42e5b8d..87069840 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml @@ -1,3 +1,4 @@ dataset_name: economics include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_economics +task: kmmlu_direct_hard_economics +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml index 9c90432f..75baa136 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml @@ -1,3 +1,4 @@ dataset_name: education include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_education +task: kmmlu_direct_hard_education +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml index 780dad22..789cdfb8 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electrical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_electrical_engineering +task: kmmlu_direct_hard_electrical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml index e0178154..9a1736e0 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electronics_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_electronics_engineering +task: kmmlu_direct_hard_electronics_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml index d4c2ca7d..4653272e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: energy_management include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_energy_management +task: kmmlu_direct_hard_energy_management +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml index de511a09..60c0253e 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: environmental_science include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_environmental_science +task: kmmlu_direct_hard_environmental_science +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml index 26f0617d..86bbb9b4 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: fashion include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_fashion +task: kmmlu_direct_hard_fashion +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml index e48143d2..6b2817d2 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: food_processing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_food_processing +task: kmmlu_direct_hard_food_processing +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml index eb5211ad..c2d2f477 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: gas_technology_and_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_gas_technology_and_engineering +task: kmmlu_direct_hard_gas_technology_and_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml index a25f3c1a..9dadc72d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: geomatics include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_geomatics +task: kmmlu_direct_hard_geomatics +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml index 0fef809e..f1bf4c77 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml @@ -1,3 +1,4 @@ dataset_name: health include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_health +task: kmmlu_direct_hard_health +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml index d7ca26e5..5f7b73ea 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: industrial_engineer include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_industrial_engineer +task: kmmlu_direct_hard_industrial_engineer +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml index 0f8d01ec..a1c5cf9d 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: information_technology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_information_technology +task: kmmlu_direct_hard_information_technology +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml index 3b130381..65a20727 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: interior_architecture_and_design include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_interior_architecture_and_design +task: kmmlu_direct_hard_interior_architecture_and_design +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml index c4d595d1..c10a9f57 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: korean_history include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_korean_history +task: kmmlu_direct_hard_korean_history +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml index 168f0340..96e5514f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml @@ -1,3 +1,4 @@ dataset_name: law include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_law +task: kmmlu_direct_hard_law +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml index 73665b1b..50dfd63b 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: machine_design_and_manufacturing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_machine_design_and_manufacturing +task: kmmlu_direct_hard_machine_design_and_manufacturing +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml index 6eb945d2..48c339d7 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml @@ -1,3 +1,4 @@ dataset_name: management include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_management +task: kmmlu_direct_hard_management +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml index 4078cf97..937bfd27 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: maritime_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_maritime_engineering +task: kmmlu_direct_hard_maritime_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml index 37d62bb1..1ae4088a 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: marketing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_marketing +task: kmmlu_direct_hard_marketing +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml index c1e2645c..432460eb 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: materials_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_materials_engineering +task: kmmlu_direct_hard_materials_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml index f5f3373a..53d2fca1 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml @@ -1,3 +1,4 @@ dataset_name: math include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_math +task: kmmlu_direct_hard_math +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml index dae55511..1a3994ea 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: mechanical_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_mechanical_engineering +task: kmmlu_direct_hard_mechanical_engineering +tag: kmmlu_direct_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml index 3ff95837..909c502c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: nondestructive_testing include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_nondestructive_testing +task: kmmlu_direct_hard_nondestructive_testing +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml index d913752b..d8faf972 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml @@ -1,3 +1,4 @@ dataset_name: patent include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_patent +task: kmmlu_direct_hard_patent +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml index 8a5d96b6..0b650507 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: political_science_and_sociology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_political_science_and_sociology +task: kmmlu_direct_hard_political_science_and_sociology +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml index 9fbf0d31..b1a6f777 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: psychology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_psychology +task: kmmlu_direct_hard_psychology +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml index b376c4eb..3da46294 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: public_safety include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_public_safety +task: kmmlu_direct_hard_public_safety +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml index 0eb534e5..74e5e02f 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: railway_and_automotive_engineering include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_railway_and_automotive_engineering +task: kmmlu_direct_hard_railway_and_automotive_engineering +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml index 9c3df599..8f23fae5 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: real_estate include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_real_estate +task: kmmlu_direct_hard_real_estate +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml index f62e8e95..192a1f2c 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: refrigerating_machinery include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_refrigerating_machinery +task: kmmlu_direct_hard_refrigerating_machinery +tag: kmmlu_direct_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml index ad4dc2cf..c24babc3 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: social_welfare include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_social_welfare +task: kmmlu_direct_hard_social_welfare +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml index 445ab693..17586af6 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: taxation include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_taxation +task: kmmlu_direct_hard_taxation +tag: kmmlu_direct_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml index 498b2fb2..bed0df91 100644 --- a/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: telecommunications_and_wireless_technology include: _direct_hard_kmmlu_yaml -task: kmmlu_hard_direct_telecommunications_and_wireless_technology +task: kmmlu_direct_hard_telecommunications_and_wireless_technology +tag: kmmlu_direct_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml b/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml index 26c4105b..b3e69705 100644 --- a/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml +++ b/lm_eval/tasks/kmmlu/hard/_hard_kmmlu_yaml @@ -1,6 +1,3 @@ -tag: - - kmmlu - - kmmlu_hard dataset_path: HAERAE-HUB/KMMLU-HARD output_type: multiple_choice test_split: test @@ -12,8 +9,5 @@ metric_list: - metric: acc aggregation: mean higher_is_better: true - - metric: acc_norm - aggregation: mean - higher_is_better: true metadata: version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml new file mode 100644 index 00000000..827e74ec --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard.yaml @@ -0,0 +1,11 @@ +group: kmmlu_hard +task: + - kmmlu_hard_stem + - kmmlu_hard_other + - kmmlu_hard_applied_science + - kmmlu_hard_humss +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml new file mode 100644 index 00000000..76d383af --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_applied_science.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_applied_science +task: + - kmmlu_hard_applied_science_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml new file mode 100644 index 00000000..39eb5a7a --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_humss.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_humss +task: + - kmmlu_hard_humss_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml new file mode 100644 index 00000000..5759fe88 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_other.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_other +task: + - kmmlu_hard_other_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml new file mode 100644 index 00000000..ee14c726 --- /dev/null +++ b/lm_eval/tasks/kmmlu/hard/_kmmlu_hard_stem.yaml @@ -0,0 +1,8 @@ +group: kmmlu_hard_stem +task: + - kmmlu_hard_stem_tasks +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 2.0 diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml index 8112903b..0c341baa 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml @@ -1,3 +1,4 @@ dataset_name: accounting include: _hard_kmmlu_yaml task: kmmlu_hard_accounting +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml index 3a20948b..90d284c8 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml @@ -1,3 +1,4 @@ dataset_name: agricultural_sciences include: _hard_kmmlu_yaml task: kmmlu_hard_agricultural_sciences +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml index 87b3845f..5ec90f36 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_aviation_engineering_and_maintenance.yaml @@ -1,3 +1,4 @@ dataset_name: aviation_engineering_and_maintenance include: _hard_kmmlu_yaml task: kmmlu_hard_aviation_engineering_and_maintenance +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml index 0a28b7c7..045e17e7 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml @@ -1,3 +1,4 @@ dataset_name: biology include: _hard_kmmlu_yaml task: kmmlu_hard_biology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml index 8fc448a8..cbfa42eb 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: chemical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_chemical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml index 366c9502..67c65d65 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml @@ -1,3 +1,4 @@ dataset_name: chemistry include: _hard_kmmlu_yaml task: kmmlu_hard_chemistry +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml index ba1a15ad..58e3c87a 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: civil_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_civil_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml index 4e1f1213..42f91467 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml @@ -1,3 +1,4 @@ dataset_name: computer_science include: _hard_kmmlu_yaml task: kmmlu_hard_computer_science +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml index 8331379c..55a5a1d0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml @@ -1,3 +1,4 @@ dataset_name: construction include: _hard_kmmlu_yaml task: kmmlu_hard_construction +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml index b7acd49a..14e4d5ad 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml @@ -1,3 +1,4 @@ dataset_name: criminal_law include: _hard_kmmlu_yaml task: kmmlu_hard_criminal_law +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml index 6542c1ee..c737b1ab 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml @@ -1,3 +1,4 @@ dataset_name: ecology include: _hard_kmmlu_yaml task: kmmlu_hard_ecology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml index 4f1bfba0..9a0084dc 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml @@ -1,3 +1,4 @@ dataset_name: economics include: _hard_kmmlu_yaml task: kmmlu_hard_economics +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml index 0f6a6a80..568d094d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml @@ -1,3 +1,4 @@ dataset_name: education include: _hard_kmmlu_yaml task: kmmlu_hard_education +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml index 51625c1e..ad46c486 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electrical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_electrical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml index 252ecc19..843c92a0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: electronics_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_electronics_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml index 062204f1..dcfe7f36 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml @@ -1,3 +1,4 @@ dataset_name: energy_management include: _hard_kmmlu_yaml task: kmmlu_hard_energy_management +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml index d7f32dc5..a0ae1b81 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml @@ -1,3 +1,4 @@ dataset_name: environmental_science include: _hard_kmmlu_yaml task: kmmlu_hard_environmental_science +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml index 9448efcf..3ba973ba 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml @@ -1,3 +1,4 @@ dataset_name: fashion include: _hard_kmmlu_yaml task: kmmlu_hard_fashion +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml index 138920ef..cd08fe3b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml @@ -1,3 +1,4 @@ dataset_name: food_processing include: _hard_kmmlu_yaml task: kmmlu_hard_food_processing +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml index 14e213b5..fe30680a 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: gas_technology_and_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_gas_technology_and_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml index 0370a7a7..53b52e96 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml @@ -1,3 +1,4 @@ dataset_name: geomatics include: _hard_kmmlu_yaml task: kmmlu_hard_geomatics +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml index c5e2ba98..dcd2b179 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml @@ -1,3 +1,4 @@ dataset_name: health include: _hard_kmmlu_yaml task: kmmlu_hard_health +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml index d3cbef78..2e8449ff 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml @@ -1,3 +1,4 @@ dataset_name: industrial_engineer include: _hard_kmmlu_yaml task: kmmlu_hard_industrial_engineer +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml index 4af23d30..86ded35d 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml @@ -1,3 +1,4 @@ dataset_name: information_technology include: _hard_kmmlu_yaml task: kmmlu_hard_information_technology +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml index 76bfe50c..55de2641 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml @@ -1,3 +1,4 @@ dataset_name: interior_architecture_and_design include: _hard_kmmlu_yaml task: kmmlu_hard_interior_architecture_and_design +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml index 60ff94e7..4d4152b7 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml @@ -1,3 +1,4 @@ dataset_name: korean_history include: _hard_kmmlu_yaml task: kmmlu_hard_korean_history +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml index aeec24dc..0a75d904 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml @@ -1,3 +1,4 @@ dataset_name: law include: _hard_kmmlu_yaml task: kmmlu_hard_law +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml index 222f89ba..210ffd8f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml @@ -1,3 +1,4 @@ dataset_name: machine_design_and_manufacturing include: _hard_kmmlu_yaml task: kmmlu_hard_machine_design_and_manufacturing +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml index 8e9e8664..d3f27519 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml @@ -1,3 +1,4 @@ dataset_name: management include: _hard_kmmlu_yaml task: kmmlu_hard_management +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml index e68041d5..dec43bc8 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: maritime_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_maritime_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml index 54a62d62..f86cfe17 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml @@ -1,3 +1,4 @@ dataset_name: marketing include: _hard_kmmlu_yaml task: kmmlu_hard_marketing +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml index 4582b0f3..684120a0 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: materials_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_materials_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml index e5637176..ed125f90 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml @@ -1,3 +1,4 @@ dataset_name: math include: _hard_kmmlu_yaml task: kmmlu_hard_math +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml index 9b3adca0..b6d00e2e 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: mechanical_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_mechanical_engineering +tag: kmmlu_hard_stem_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml index 21c25fc8..acf3ed9f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml @@ -1,3 +1,4 @@ dataset_name: nondestructive_testing include: _hard_kmmlu_yaml task: kmmlu_hard_nondestructive_testing +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml index 3fcdcd96..910f11c5 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml @@ -1,3 +1,4 @@ dataset_name: patent include: _hard_kmmlu_yaml task: kmmlu_hard_patent +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml index 6bb907cb..7b7addfd 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml @@ -1,3 +1,4 @@ dataset_name: political_science_and_sociology include: _hard_kmmlu_yaml task: kmmlu_hard_political_science_and_sociology +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml index c79cef1f..a6d8b754 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml @@ -1,3 +1,4 @@ dataset_name: psychology include: _hard_kmmlu_yaml task: kmmlu_hard_psychology +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml index 110bd147..8b04b78e 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml @@ -1,3 +1,4 @@ dataset_name: public_safety include: _hard_kmmlu_yaml task: kmmlu_hard_public_safety +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml index 31b610f7..358b7e36 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml @@ -1,3 +1,4 @@ dataset_name: railway_and_automotive_engineering include: _hard_kmmlu_yaml task: kmmlu_hard_railway_and_automotive_engineering +tag: kmmlu_hard_applied_science_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml index bd1b32c8..9010e2a7 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml @@ -1,3 +1,4 @@ dataset_name: real_estate include: _hard_kmmlu_yaml task: kmmlu_hard_real_estate +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml index 8c7dd139..5f03b70b 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml @@ -1,3 +1,4 @@ dataset_name: refrigerating_machinery include: _hard_kmmlu_yaml task: kmmlu_hard_refrigerating_machinery +tag: kmmlu_hard_other_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml index 12502a57..24f105e4 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml @@ -1,3 +1,4 @@ dataset_name: social_welfare include: _hard_kmmlu_yaml task: kmmlu_hard_social_welfare +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml index f0f815ab..7d0bbf86 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml @@ -1,3 +1,4 @@ dataset_name: taxation include: _hard_kmmlu_yaml task: kmmlu_hard_taxation +tag: kmmlu_hard_humss_tasks diff --git a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml index 0cb519d1..c1398c5f 100644 --- a/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml +++ b/lm_eval/tasks/kmmlu/hard/kmmlu_hard_telecommunications_and_wireless_technology.yaml @@ -1,3 +1,4 @@ dataset_name: telecommunications_and_wireless_technology include: _hard_kmmlu_yaml task: kmmlu_hard_telecommunications_and_wireless_technology +tag: kmmlu_hard_applied_science_tasks -- GitLab From 370e2f9e5bbe59912644b1b6e052e17be31d6858 Mon Sep 17 00:00:00 2001 From: Jan Kaniecki Date: Tue, 21 Jan 2025 17:55:23 +0100 Subject: [PATCH 09/32] Fix max_tokens handling in vllm_vlms.py (#2637) * Update vllm_vlms.py * pre-commit --------- Co-authored-by: Baber --- lm_eval/models/vllm_vlms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lm_eval/models/vllm_vlms.py b/lm_eval/models/vllm_vlms.py index ab216ab5..a0d72926 100644 --- a/lm_eval/models/vllm_vlms.py +++ b/lm_eval/models/vllm_vlms.py @@ -271,7 +271,9 @@ class VLLM_VLM(VLLM): left_truncate_len=max_ctx_len, ) - cont = self._model_generate(inputs, stop=until, generate=True, **kwargs) + cont = self._model_generate( + inputs, stop=until, generate=True, max_tokens=max_gen_toks, **kwargs + ) for output, context in zip(cont, contexts): generated_text = output.outputs[0].text -- GitLab From 5c006ed417a2f4d01248d487bcbd493ebe3e5edd Mon Sep 17 00:00:00 2001 From: Minho Ryu Date: Sat, 25 Jan 2025 01:00:29 +0900 Subject: [PATCH 10/32] separate category for `global_mmlu` (#2652) * separate category * set version 0.0 * apply precommit --- .../global_mmlu/default/_generate_configs.py | 42 ------------------- .../{_default_yaml => ar/_ar_template_yaml} | 3 +- .../default/ar/_global_mmlu_ar.yaml | 13 ++++++ .../default/ar/global_mmlu_ar_business.yaml | 4 ++ .../default/ar/global_mmlu_ar_humanities.yaml | 4 ++ .../default/ar/global_mmlu_ar_medical.yaml | 4 ++ .../default/ar/global_mmlu_ar_other.yaml | 4 ++ .../ar/global_mmlu_ar_social_sciences.yaml | 4 ++ .../default/ar/global_mmlu_ar_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ar/utils.py | 18 ++++++++ .../global_mmlu/default/bn/_bn_template_yaml | 16 +++++++ .../default/bn/_global_mmlu_bn.yaml | 13 ++++++ .../default/bn/global_mmlu_bn_business.yaml | 4 ++ .../default/bn/global_mmlu_bn_humanities.yaml | 4 ++ .../default/bn/global_mmlu_bn_medical.yaml | 4 ++ .../default/bn/global_mmlu_bn_other.yaml | 4 ++ .../bn/global_mmlu_bn_social_sciences.yaml | 4 ++ .../default/bn/global_mmlu_bn_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/bn/utils.py | 18 ++++++++ .../global_mmlu/default/de/_de_template_yaml | 16 +++++++ .../default/de/_global_mmlu_de.yaml | 13 ++++++ .../default/de/global_mmlu_de_business.yaml | 4 ++ .../default/de/global_mmlu_de_humanities.yaml | 4 ++ .../default/de/global_mmlu_de_medical.yaml | 4 ++ .../default/de/global_mmlu_de_other.yaml | 4 ++ .../de/global_mmlu_de_social_sciences.yaml | 4 ++ .../default/de/global_mmlu_de_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/de/utils.py | 18 ++++++++ .../global_mmlu/default/en/_en_template_yaml | 16 +++++++ .../default/en/_global_mmlu_en.yaml | 13 ++++++ .../default/en/global_mmlu_en_business.yaml | 4 ++ .../default/en/global_mmlu_en_humanities.yaml | 4 ++ .../default/en/global_mmlu_en_medical.yaml | 4 ++ .../default/en/global_mmlu_en_other.yaml | 4 ++ .../en/global_mmlu_en_social_sciences.yaml | 4 ++ .../default/en/global_mmlu_en_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/en/utils.py | 18 ++++++++ .../global_mmlu/default/es/_es_template_yaml | 16 +++++++ .../default/es/_global_mmlu_es.yaml | 13 ++++++ .../default/es/global_mmlu_es_business.yaml | 4 ++ .../default/es/global_mmlu_es_humanities.yaml | 4 ++ .../default/es/global_mmlu_es_medical.yaml | 4 ++ .../default/es/global_mmlu_es_other.yaml | 4 ++ .../es/global_mmlu_es_social_sciences.yaml | 4 ++ .../default/es/global_mmlu_es_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/es/utils.py | 18 ++++++++ .../global_mmlu/default/fr/_fr_template_yaml | 16 +++++++ .../default/fr/_global_mmlu_fr.yaml | 13 ++++++ .../default/fr/global_mmlu_fr_business.yaml | 4 ++ .../default/fr/global_mmlu_fr_humanities.yaml | 4 ++ .../default/fr/global_mmlu_fr_medical.yaml | 4 ++ .../default/fr/global_mmlu_fr_other.yaml | 4 ++ .../fr/global_mmlu_fr_social_sciences.yaml | 4 ++ .../default/fr/global_mmlu_fr_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/fr/utils.py | 18 ++++++++ .../global_mmlu/default/global_mmlu_ar.yaml | 4 -- .../global_mmlu/default/global_mmlu_bn.yaml | 4 -- .../global_mmlu/default/global_mmlu_de.yaml | 4 -- .../global_mmlu/default/global_mmlu_en.yaml | 4 -- .../global_mmlu/default/global_mmlu_es.yaml | 4 -- .../global_mmlu/default/global_mmlu_fr.yaml | 4 -- .../global_mmlu/default/global_mmlu_hi.yaml | 4 -- .../global_mmlu/default/global_mmlu_id.yaml | 4 -- .../global_mmlu/default/global_mmlu_it.yaml | 4 -- .../global_mmlu/default/global_mmlu_ja.yaml | 4 -- .../global_mmlu/default/global_mmlu_ko.yaml | 4 -- .../global_mmlu/default/global_mmlu_pt.yaml | 4 -- .../global_mmlu/default/global_mmlu_sw.yaml | 4 -- .../global_mmlu/default/global_mmlu_yo.yaml | 4 -- .../global_mmlu/default/global_mmlu_zh.yaml | 4 -- .../default/hi/_global_mmlu_hi.yaml | 13 ++++++ .../global_mmlu/default/hi/_hi_template_yaml | 16 +++++++ .../default/hi/global_mmlu_hi_business.yaml | 4 ++ .../default/hi/global_mmlu_hi_humanities.yaml | 4 ++ .../default/hi/global_mmlu_hi_medical.yaml | 4 ++ .../default/hi/global_mmlu_hi_other.yaml | 4 ++ .../hi/global_mmlu_hi_social_sciences.yaml | 4 ++ .../default/hi/global_mmlu_hi_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/hi/utils.py | 18 ++++++++ .../default/id/_global_mmlu_id.yaml | 13 ++++++ .../global_mmlu/default/id/_id_template_yaml | 16 +++++++ .../default/id/global_mmlu_id_business.yaml | 4 ++ .../default/id/global_mmlu_id_humanities.yaml | 4 ++ .../default/id/global_mmlu_id_medical.yaml | 4 ++ .../default/id/global_mmlu_id_other.yaml | 4 ++ .../id/global_mmlu_id_social_sciences.yaml | 4 ++ .../default/id/global_mmlu_id_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/id/utils.py | 18 ++++++++ .../default/it/_global_mmlu_it.yaml | 13 ++++++ .../global_mmlu/default/it/_it_template_yaml | 16 +++++++ .../default/it/global_mmlu_it_business.yaml | 4 ++ .../default/it/global_mmlu_it_humanities.yaml | 4 ++ .../default/it/global_mmlu_it_medical.yaml | 4 ++ .../default/it/global_mmlu_it_other.yaml | 4 ++ .../it/global_mmlu_it_social_sciences.yaml | 4 ++ .../default/it/global_mmlu_it_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/it/utils.py | 18 ++++++++ .../default/ja/_global_mmlu_ja.yaml | 13 ++++++ .../global_mmlu/default/ja/_ja_template_yaml | 16 +++++++ .../default/ja/global_mmlu_ja_business.yaml | 4 ++ .../default/ja/global_mmlu_ja_humanities.yaml | 4 ++ .../default/ja/global_mmlu_ja_medical.yaml | 4 ++ .../default/ja/global_mmlu_ja_other.yaml | 4 ++ .../ja/global_mmlu_ja_social_sciences.yaml | 4 ++ .../default/ja/global_mmlu_ja_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ja/utils.py | 18 ++++++++ .../default/ko/_global_mmlu_ko.yaml | 13 ++++++ .../global_mmlu/default/ko/_ko_template_yaml | 16 +++++++ .../default/ko/global_mmlu_ko_business.yaml | 4 ++ .../default/ko/global_mmlu_ko_humanities.yaml | 4 ++ .../default/ko/global_mmlu_ko_medical.yaml | 4 ++ .../default/ko/global_mmlu_ko_other.yaml | 4 ++ .../ko/global_mmlu_ko_social_sciences.yaml | 4 ++ .../default/ko/global_mmlu_ko_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/ko/utils.py | 18 ++++++++ .../default/pt/_global_mmlu_pt.yaml | 13 ++++++ .../global_mmlu/default/pt/_pt_template_yaml | 16 +++++++ .../default/pt/global_mmlu_pt_business.yaml | 4 ++ .../default/pt/global_mmlu_pt_humanities.yaml | 4 ++ .../default/pt/global_mmlu_pt_medical.yaml | 4 ++ .../default/pt/global_mmlu_pt_other.yaml | 4 ++ .../pt/global_mmlu_pt_social_sciences.yaml | 4 ++ .../default/pt/global_mmlu_pt_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/pt/utils.py | 18 ++++++++ .../default/sw/_global_mmlu_sw.yaml | 13 ++++++ .../global_mmlu/default/sw/_sw_template_yaml | 16 +++++++ .../default/sw/global_mmlu_sw_business.yaml | 4 ++ .../default/sw/global_mmlu_sw_humanities.yaml | 4 ++ .../default/sw/global_mmlu_sw_medical.yaml | 4 ++ .../default/sw/global_mmlu_sw_other.yaml | 4 ++ .../sw/global_mmlu_sw_social_sciences.yaml | 4 ++ .../default/sw/global_mmlu_sw_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/sw/utils.py | 18 ++++++++ .../default/yo/_global_mmlu_yo.yaml | 13 ++++++ .../global_mmlu/default/yo/_yo_template_yaml | 16 +++++++ .../default/yo/global_mmlu_yo_business.yaml | 4 ++ .../default/yo/global_mmlu_yo_humanities.yaml | 4 ++ .../default/yo/global_mmlu_yo_medical.yaml | 4 ++ .../default/yo/global_mmlu_yo_other.yaml | 4 ++ .../yo/global_mmlu_yo_social_sciences.yaml | 4 ++ .../default/yo/global_mmlu_yo_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/yo/utils.py | 18 ++++++++ .../default/zh/_global_mmlu_zh.yaml | 13 ++++++ .../global_mmlu/default/zh/_zh_template_yaml | 16 +++++++ .../default/zh/global_mmlu_zh_business.yaml | 4 ++ .../default/zh/global_mmlu_zh_humanities.yaml | 4 ++ .../default/zh/global_mmlu_zh_medical.yaml | 4 ++ .../default/zh/global_mmlu_zh_other.yaml | 4 ++ .../zh/global_mmlu_zh_social_sciences.yaml | 4 ++ .../default/zh/global_mmlu_zh_stem.yaml | 4 ++ lm_eval/tasks/global_mmlu/default/zh/utils.py | 18 ++++++++ .../full/am/_global_mmlu_full_am.yaml | 2 +- .../full/ar/_global_mmlu_full_ar.yaml | 2 +- .../full/bn/_global_mmlu_full_bn.yaml | 2 +- .../full/cs/_global_mmlu_full_cs.yaml | 2 +- .../full/de/_global_mmlu_full_de.yaml | 2 +- .../full/el/_global_mmlu_full_el.yaml | 2 +- .../full/en/_global_mmlu_full_en.yaml | 2 +- .../full/es/_global_mmlu_full_es.yaml | 2 +- .../full/fa/_global_mmlu_full_fa.yaml | 2 +- .../full/fil/_global_mmlu_full_fil.yaml | 2 +- .../full/fr/_global_mmlu_full_fr.yaml | 2 +- .../full/ha/_global_mmlu_full_ha.yaml | 2 +- .../full/he/_global_mmlu_full_he.yaml | 2 +- .../full/hi/_global_mmlu_full_hi.yaml | 2 +- .../full/id/_global_mmlu_full_id.yaml | 2 +- .../full/ig/_global_mmlu_full_ig.yaml | 2 +- .../full/it/_global_mmlu_full_it.yaml | 2 +- .../full/ja/_global_mmlu_full_ja.yaml | 2 +- .../full/ko/_global_mmlu_full_ko.yaml | 2 +- .../full/ky/_global_mmlu_full_ky.yaml | 2 +- .../full/lt/_global_mmlu_full_lt.yaml | 2 +- .../full/mg/_global_mmlu_full_mg.yaml | 2 +- .../full/ms/_global_mmlu_full_ms.yaml | 2 +- .../full/ne/_global_mmlu_full_ne.yaml | 2 +- .../full/nl/_global_mmlu_full_nl.yaml | 2 +- .../full/ny/_global_mmlu_full_ny.yaml | 2 +- .../full/pl/_global_mmlu_full_pl.yaml | 2 +- .../full/pt/_global_mmlu_full_pt.yaml | 2 +- .../full/ro/_global_mmlu_full_ro.yaml | 2 +- .../full/ru/_global_mmlu_full_ru.yaml | 2 +- .../full/si/_global_mmlu_full_si.yaml | 2 +- .../full/sn/_global_mmlu_full_sn.yaml | 2 +- .../full/so/_global_mmlu_full_so.yaml | 2 +- .../full/sr/_global_mmlu_full_sr.yaml | 2 +- .../full/sv/_global_mmlu_full_sv.yaml | 2 +- .../full/sw/_global_mmlu_full_sw.yaml | 2 +- .../full/te/_global_mmlu_full_te.yaml | 2 +- .../full/tr/_global_mmlu_full_tr.yaml | 2 +- .../full/uk/_global_mmlu_full_uk.yaml | 2 +- .../full/vi/_global_mmlu_full_vi.yaml | 2 +- .../full/yo/_global_mmlu_full_yo.yaml | 2 +- .../full/zh/_global_mmlu_full_zh.yaml | 2 +- 193 files changed, 1092 insertions(+), 146 deletions(-) delete mode 100644 lm_eval/tasks/global_mmlu/default/_generate_configs.py rename lm_eval/tasks/global_mmlu/default/{_default_yaml => ar/_ar_template_yaml} (95%) create mode 100644 lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ar/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/bn/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/de/_de_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/de/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/en/_en_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/en/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/es/_es_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/es/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/fr/utils.py delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml delete mode 100644 lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/hi/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/_id_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/id/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/_it_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/it/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ja/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/ko/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/pt/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/sw/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/yo/utils.py create mode 100644 lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml create mode 100644 lm_eval/tasks/global_mmlu/default/zh/utils.py diff --git a/lm_eval/tasks/global_mmlu/default/_generate_configs.py b/lm_eval/tasks/global_mmlu/default/_generate_configs.py deleted file mode 100644 index 58e169c6..00000000 --- a/lm_eval/tasks/global_mmlu/default/_generate_configs.py +++ /dev/null @@ -1,42 +0,0 @@ -import yaml - - -languages = [ - "en", - "ar", - "fr", - "es", - "hi", - "de", - "id", - "it", - "ja", - "ko", - "pt", - "zh", - "yo", - "bn", - "sw", -] - - -def main() -> None: - for language in languages: - file_name = f"global_mmlu_{language}.yaml" - try: - with open(f"{file_name}", "w") as f: - f.write("# Generated by _generate_configs.py\n") - yaml.dump( - { - "include": "_default_yaml", - "task": f"global_mmlu_{language}", - "dataset_name": language, - }, - f, - ) - except FileExistsError: - pass - - -if __name__ == "__main__": - main() diff --git a/lm_eval/tasks/global_mmlu/default/_default_yaml b/lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml similarity index 95% rename from lm_eval/tasks/global_mmlu/default/_default_yaml rename to lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml index 33a1fc35..3fa8f23f 100644 --- a/lm_eval/tasks/global_mmlu/default/_default_yaml +++ b/lm_eval/tasks/global_mmlu/default/ar/_ar_template_yaml @@ -1,6 +1,5 @@ -tag: - - global_mmlu dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ar test_split: test fewshot_split: dev fewshot_config: diff --git a/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml new file mode 100644 index 00000000..27f6e1a4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/_global_mmlu_ar.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ar +task: + - global_mmlu_ar_business + - global_mmlu_ar_humanities + - global_mmlu_ar_medical + - global_mmlu_ar_other + - global_mmlu_ar_stem + - global_mmlu_ar_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml new file mode 100644 index 00000000..c7f47fdf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ar_business diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml new file mode 100644 index 00000000..c35f1f6e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ar_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml new file mode 100644 index 00000000..cb405486 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ar_medical diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml new file mode 100644 index 00000000..1ffd9be8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ar_other diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml new file mode 100644 index 00000000..037e25a8 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ar_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml new file mode 100644 index 00000000..f2ed28c7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/global_mmlu_ar_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ar_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ar_stem diff --git a/lm_eval/tasks/global_mmlu/default/ar/utils.py b/lm_eval/tasks/global_mmlu/default/ar/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ar/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml new file mode 100644 index 00000000..c9a234db --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/_bn_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: bn +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml new file mode 100644 index 00000000..4098af1a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/_global_mmlu_bn.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_bn +task: + - global_mmlu_bn_business + - global_mmlu_bn_humanities + - global_mmlu_bn_medical + - global_mmlu_bn_other + - global_mmlu_bn_stem + - global_mmlu_bn_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml new file mode 100644 index 00000000..c77589c3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_bn_business diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml new file mode 100644 index 00000000..da495c6d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_bn_humanities diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml new file mode 100644 index 00000000..867e5e4e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_bn_medical diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml new file mode 100644 index 00000000..c44b6d75 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_bn_other diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml new file mode 100644 index 00000000..7bbde182 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_bn_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml new file mode 100644 index 00000000..433ba8b7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/global_mmlu_bn_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _bn_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_bn_stem diff --git a/lm_eval/tasks/global_mmlu/default/bn/utils.py b/lm_eval/tasks/global_mmlu/default/bn/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/bn/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml new file mode 100644 index 00000000..7c17e2d0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/_de_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: de +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml new file mode 100644 index 00000000..1a54aace --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/_global_mmlu_de.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_de +task: + - global_mmlu_de_business + - global_mmlu_de_humanities + - global_mmlu_de_medical + - global_mmlu_de_other + - global_mmlu_de_stem + - global_mmlu_de_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml new file mode 100644 index 00000000..eba9514c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_de_business diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml new file mode 100644 index 00000000..d37de491 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_de_humanities diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml new file mode 100644 index 00000000..f114de46 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_de_medical diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml new file mode 100644 index 00000000..d6089b2d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_de_other diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml new file mode 100644 index 00000000..853711f3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_de_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml new file mode 100644 index 00000000..ef66d3cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/global_mmlu_de_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _de_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_de_stem diff --git a/lm_eval/tasks/global_mmlu/default/de/utils.py b/lm_eval/tasks/global_mmlu/default/de/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/de/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml b/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml new file mode 100644 index 00000000..e24d7983 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/_en_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: en +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml new file mode 100644 index 00000000..fc927412 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/_global_mmlu_en.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_en +task: + - global_mmlu_en_business + - global_mmlu_en_humanities + - global_mmlu_en_medical + - global_mmlu_en_other + - global_mmlu_en_stem + - global_mmlu_en_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml new file mode 100644 index 00000000..aa3f4bc1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_en_business diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml new file mode 100644 index 00000000..c2a20e29 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_en_humanities diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml new file mode 100644 index 00000000..ba991459 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_en_medical diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml new file mode 100644 index 00000000..c14d7657 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_en_other diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml new file mode 100644 index 00000000..d576d2c1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_en_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml new file mode 100644 index 00000000..fd0179f2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/global_mmlu_en_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _en_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_en_stem diff --git a/lm_eval/tasks/global_mmlu/default/en/utils.py b/lm_eval/tasks/global_mmlu/default/en/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/en/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml b/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml new file mode 100644 index 00000000..b0942331 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/_es_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: es +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml new file mode 100644 index 00000000..614b1b0f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/_global_mmlu_es.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_es +task: + - global_mmlu_es_business + - global_mmlu_es_humanities + - global_mmlu_es_medical + - global_mmlu_es_other + - global_mmlu_es_stem + - global_mmlu_es_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml new file mode 100644 index 00000000..388251a2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_es_business diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml new file mode 100644 index 00000000..fd51574b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_es_humanities diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml new file mode 100644 index 00000000..649ad70d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_es_medical diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml new file mode 100644 index 00000000..878251d1 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_es_other diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml new file mode 100644 index 00000000..1e97c6ad --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_es_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml new file mode 100644 index 00000000..45b4fa4a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/global_mmlu_es_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _es_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_es_stem diff --git a/lm_eval/tasks/global_mmlu/default/es/utils.py b/lm_eval/tasks/global_mmlu/default/es/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/es/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml b/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml new file mode 100644 index 00000000..a2c6fc88 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/_fr_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: fr +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml new file mode 100644 index 00000000..d65a2e25 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/_global_mmlu_fr.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_fr +task: + - global_mmlu_fr_business + - global_mmlu_fr_humanities + - global_mmlu_fr_medical + - global_mmlu_fr_other + - global_mmlu_fr_stem + - global_mmlu_fr_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml new file mode 100644 index 00000000..49f8543b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_fr_business diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml new file mode 100644 index 00000000..35d0086b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_fr_humanities diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml new file mode 100644 index 00000000..e411a347 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_fr_medical diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml new file mode 100644 index 00000000..5bece303 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_fr_other diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml new file mode 100644 index 00000000..4e26ceab --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_fr_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml new file mode 100644 index 00000000..6d3d1538 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/global_mmlu_fr_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _fr_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_fr_stem diff --git a/lm_eval/tasks/global_mmlu/default/fr/utils.py b/lm_eval/tasks/global_mmlu/default/fr/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/fr/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml deleted file mode 100644 index 703f420a..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ar.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ar -include: _default_yaml -task: global_mmlu_ar diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml deleted file mode 100644 index f85b67a2..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_bn.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: bn -include: _default_yaml -task: global_mmlu_bn diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml deleted file mode 100644 index a874c64f..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_de.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: de -include: _default_yaml -task: global_mmlu_de diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml deleted file mode 100644 index 34a6d712..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_en.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: en -include: _default_yaml -task: global_mmlu_en diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml deleted file mode 100644 index 75abc775..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_es.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: es -include: _default_yaml -task: global_mmlu_es diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml deleted file mode 100644 index 1a66f536..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_fr.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: fr -include: _default_yaml -task: global_mmlu_fr diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml deleted file mode 100644 index 788f95f2..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_hi.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: hi -include: _default_yaml -task: global_mmlu_hi diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml deleted file mode 100644 index f4b6d507..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_id.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: id -include: _default_yaml -task: global_mmlu_id diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml deleted file mode 100644 index 5b55df97..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_it.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: it -include: _default_yaml -task: global_mmlu_it diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml deleted file mode 100644 index 97d9c6ca..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ja.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ja -include: _default_yaml -task: global_mmlu_ja diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml deleted file mode 100644 index 02b7fe03..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_ko.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: ko -include: _default_yaml -task: global_mmlu_ko diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml deleted file mode 100644 index 724bfb4d..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_pt.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: pt -include: _default_yaml -task: global_mmlu_pt diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml deleted file mode 100644 index 481232fa..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_sw.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: sw -include: _default_yaml -task: global_mmlu_sw diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml deleted file mode 100644 index c6ec2f9e..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_yo.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: yo -include: _default_yaml -task: global_mmlu_yo diff --git a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml deleted file mode 100644 index 862d46ad..00000000 --- a/lm_eval/tasks/global_mmlu/default/global_mmlu_zh.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Generated by _generate_configs.py -dataset_name: zh -include: _default_yaml -task: global_mmlu_zh diff --git a/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml b/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml new file mode 100644 index 00000000..406b27a6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/_global_mmlu_hi.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_hi +task: + - global_mmlu_hi_business + - global_mmlu_hi_humanities + - global_mmlu_hi_medical + - global_mmlu_hi_other + - global_mmlu_hi_stem + - global_mmlu_hi_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml b/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml new file mode 100644 index 00000000..180dee96 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/_hi_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: hi +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml new file mode 100644 index 00000000..63b516c5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_hi_business diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml new file mode 100644 index 00000000..d8e888cd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_hi_humanities diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml new file mode 100644 index 00000000..46a21957 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_hi_medical diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml new file mode 100644 index 00000000..ea242d7a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_hi_other diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml new file mode 100644 index 00000000..df95b8c4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_hi_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml new file mode 100644 index 00000000..acab4f12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/global_mmlu_hi_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _hi_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_hi_stem diff --git a/lm_eval/tasks/global_mmlu/default/hi/utils.py b/lm_eval/tasks/global_mmlu/default/hi/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/hi/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml b/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml new file mode 100644 index 00000000..cfe87f59 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/_global_mmlu_id.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_id +task: + - global_mmlu_id_business + - global_mmlu_id_humanities + - global_mmlu_id_medical + - global_mmlu_id_other + - global_mmlu_id_stem + - global_mmlu_id_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml b/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml new file mode 100644 index 00000000..fae80c32 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/_id_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: id +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml new file mode 100644 index 00000000..d8f7c1cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_id_business diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml new file mode 100644 index 00000000..459442d4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_id_humanities diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml new file mode 100644 index 00000000..1fe61f13 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_id_medical diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml new file mode 100644 index 00000000..dfdf7dd2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_id_other diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml new file mode 100644 index 00000000..8ac1ddf4 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_id_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml new file mode 100644 index 00000000..a2230d33 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/global_mmlu_id_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _id_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_id_stem diff --git a/lm_eval/tasks/global_mmlu/default/id/utils.py b/lm_eval/tasks/global_mmlu/default/id/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/id/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml b/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml new file mode 100644 index 00000000..1378b765 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/_global_mmlu_it.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_it +task: + - global_mmlu_it_business + - global_mmlu_it_humanities + - global_mmlu_it_medical + - global_mmlu_it_other + - global_mmlu_it_stem + - global_mmlu_it_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml b/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml new file mode 100644 index 00000000..e6b1f56d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/_it_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: it +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml new file mode 100644 index 00000000..dabac0a9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_it_business diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml new file mode 100644 index 00000000..6d2c923f --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_it_humanities diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml new file mode 100644 index 00000000..25d4efc5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_it_medical diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml new file mode 100644 index 00000000..3e35260d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_it_other diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml new file mode 100644 index 00000000..bee79835 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_it_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml new file mode 100644 index 00000000..04502cef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/global_mmlu_it_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _it_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_it_stem diff --git a/lm_eval/tasks/global_mmlu/default/it/utils.py b/lm_eval/tasks/global_mmlu/default/it/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/it/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml b/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml new file mode 100644 index 00000000..098f3b57 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/_global_mmlu_ja.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ja +task: + - global_mmlu_ja_business + - global_mmlu_ja_humanities + - global_mmlu_ja_medical + - global_mmlu_ja_other + - global_mmlu_ja_stem + - global_mmlu_ja_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml b/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml new file mode 100644 index 00000000..5f0e4cc6 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/_ja_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ja +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml new file mode 100644 index 00000000..19a5050a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ja_business diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml new file mode 100644 index 00000000..b2d83886 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ja_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml new file mode 100644 index 00000000..8c0695ef --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ja_medical diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml new file mode 100644 index 00000000..5e72d4c0 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ja_other diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml new file mode 100644 index 00000000..acdabd53 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ja_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml new file mode 100644 index 00000000..b9ab07cb --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/global_mmlu_ja_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ja_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ja_stem diff --git a/lm_eval/tasks/global_mmlu/default/ja/utils.py b/lm_eval/tasks/global_mmlu/default/ja/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ja/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml b/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml new file mode 100644 index 00000000..19f4f961 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/_global_mmlu_ko.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_ko +task: + - global_mmlu_ko_business + - global_mmlu_ko_humanities + - global_mmlu_ko_medical + - global_mmlu_ko_other + - global_mmlu_ko_stem + - global_mmlu_ko_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml b/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml new file mode 100644 index 00000000..364e159b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/_ko_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: ko +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml new file mode 100644 index 00000000..2f1ce375 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_ko_business diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml new file mode 100644 index 00000000..a613ff55 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_ko_humanities diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml new file mode 100644 index 00000000..7e871038 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_ko_medical diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml new file mode 100644 index 00000000..3fa1c608 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_ko_other diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml new file mode 100644 index 00000000..ad5874f9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_ko_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml new file mode 100644 index 00000000..f6c7e8ec --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/global_mmlu_ko_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _ko_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_ko_stem diff --git a/lm_eval/tasks/global_mmlu/default/ko/utils.py b/lm_eval/tasks/global_mmlu/default/ko/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/ko/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml b/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml new file mode 100644 index 00000000..7a489c12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/_global_mmlu_pt.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_pt +task: + - global_mmlu_pt_business + - global_mmlu_pt_humanities + - global_mmlu_pt_medical + - global_mmlu_pt_other + - global_mmlu_pt_stem + - global_mmlu_pt_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml b/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml new file mode 100644 index 00000000..f1db6629 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/_pt_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: pt +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml new file mode 100644 index 00000000..1e72b168 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_pt_business diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml new file mode 100644 index 00000000..7244f2a7 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_pt_humanities diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml new file mode 100644 index 00000000..44776f2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_pt_medical diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml new file mode 100644 index 00000000..b6121201 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_pt_other diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml new file mode 100644 index 00000000..949d346e --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_pt_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml new file mode 100644 index 00000000..9f194c11 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/global_mmlu_pt_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _pt_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_pt_stem diff --git a/lm_eval/tasks/global_mmlu/default/pt/utils.py b/lm_eval/tasks/global_mmlu/default/pt/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/pt/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml b/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml new file mode 100644 index 00000000..b3913d24 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/_global_mmlu_sw.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_sw +task: + - global_mmlu_sw_business + - global_mmlu_sw_humanities + - global_mmlu_sw_medical + - global_mmlu_sw_other + - global_mmlu_sw_stem + - global_mmlu_sw_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml b/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml new file mode 100644 index 00000000..33edff38 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/_sw_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: sw +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml new file mode 100644 index 00000000..a53ca478 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_sw_business diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml new file mode 100644 index 00000000..4687df76 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_sw_humanities diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml new file mode 100644 index 00000000..76240ea3 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_sw_medical diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml new file mode 100644 index 00000000..7c3bfda2 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_sw_other diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml new file mode 100644 index 00000000..4a77aa2b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_sw_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml new file mode 100644 index 00000000..d6faf18b --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/global_mmlu_sw_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _sw_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_sw_stem diff --git a/lm_eval/tasks/global_mmlu/default/sw/utils.py b/lm_eval/tasks/global_mmlu/default/sw/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/sw/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml b/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml new file mode 100644 index 00000000..14df221a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/_global_mmlu_yo.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_yo +task: + - global_mmlu_yo_business + - global_mmlu_yo_humanities + - global_mmlu_yo_medical + - global_mmlu_yo_other + - global_mmlu_yo_stem + - global_mmlu_yo_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml b/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml new file mode 100644 index 00000000..6cdd0a03 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/_yo_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: yo +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml new file mode 100644 index 00000000..162a97cf --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_yo_business diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml new file mode 100644 index 00000000..5befbc12 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_yo_humanities diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml new file mode 100644 index 00000000..d48d0208 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_yo_medical diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml new file mode 100644 index 00000000..5e407c2c --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_yo_other diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml new file mode 100644 index 00000000..c85596aa --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_yo_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml new file mode 100644 index 00000000..a19e1e8d --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/global_mmlu_yo_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _yo_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_yo_stem diff --git a/lm_eval/tasks/global_mmlu/default/yo/utils.py b/lm_eval/tasks/global_mmlu/default/yo/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/yo/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml b/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml new file mode 100644 index 00000000..212a33fc --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/_global_mmlu_zh.yaml @@ -0,0 +1,13 @@ +group: global_mmlu_zh +task: + - global_mmlu_zh_business + - global_mmlu_zh_humanities + - global_mmlu_zh_medical + - global_mmlu_zh_other + - global_mmlu_zh_stem + - global_mmlu_zh_social_sciences +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml new file mode 100644 index 00000000..eeb1e7b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/_zh_template_yaml @@ -0,0 +1,16 @@ +dataset_path: CohereForAI/Global-MMLU-Lite +dataset_name: zh +test_split: test +fewshot_split: dev +fewshot_config: + sampler: default +output_type: multiple_choice +doc_to_text: "{{question.strip()}}\nA. {{option_a}}\nB. {{option_b}}\nC. {{option_c}}\nD. {{option_d}}\nAnswer:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_target: answer +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml new file mode 100644 index 00000000..aa0a689a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_business.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_business +task: global_mmlu_zh_business diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml new file mode 100644 index 00000000..823854b9 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_humanities.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_humanities +task: global_mmlu_zh_humanities diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml new file mode 100644 index 00000000..f1f7a7d5 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_medical.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_medical +task: global_mmlu_zh_medical diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml new file mode 100644 index 00000000..a3beae83 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_other.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_other +task: global_mmlu_zh_other diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml new file mode 100644 index 00000000..1891a45a --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_social_sciences.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_social_sciences +task: global_mmlu_zh_social_sciences diff --git a/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml new file mode 100644 index 00000000..6a9f4f05 --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/global_mmlu_zh_stem.yaml @@ -0,0 +1,4 @@ +# Generated by _generate_configs.py +include: _zh_template_yaml +process_docs: !function utils.process_stem +task: global_mmlu_zh_stem diff --git a/lm_eval/tasks/global_mmlu/default/zh/utils.py b/lm_eval/tasks/global_mmlu/default/zh/utils.py new file mode 100644 index 00000000..507a41bd --- /dev/null +++ b/lm_eval/tasks/global_mmlu/default/zh/utils.py @@ -0,0 +1,18 @@ +from functools import partial + + +CATEGORIES = ["Business", "Humanities", "Medical", "Other", "STEM", "Social Sciences"] + + +def process_docs(dataset, category): + return dataset.filter(lambda x: x["subject_category"] == category) + + +process_functions = { + f"process_{category.lower().replace(' ', '_')}": partial( + process_docs, category=category + ) + for category in CATEGORIES +} + +globals().update(process_functions) diff --git a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml index 48fc270a..555bfd86 100644 --- a/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml +++ b/lm_eval/tasks/global_mmlu/full/am/_global_mmlu_full_am.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml index 61f60b9b..83340da0 100644 --- a/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml +++ b/lm_eval/tasks/global_mmlu/full/ar/_global_mmlu_full_ar.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml index f1c91f09..135b4bf5 100644 --- a/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml +++ b/lm_eval/tasks/global_mmlu/full/bn/_global_mmlu_full_bn.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml index 977b0051..419d5de4 100644 --- a/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml +++ b/lm_eval/tasks/global_mmlu/full/cs/_global_mmlu_full_cs.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml index c09da268..5217599a 100644 --- a/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml +++ b/lm_eval/tasks/global_mmlu/full/de/_global_mmlu_full_de.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml index a77feecb..9d44d8c8 100644 --- a/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml +++ b/lm_eval/tasks/global_mmlu/full/el/_global_mmlu_full_el.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml index 648a10dd..d4c82b64 100644 --- a/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml +++ b/lm_eval/tasks/global_mmlu/full/en/_global_mmlu_full_en.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml index 832001c1..13d2eccf 100644 --- a/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml +++ b/lm_eval/tasks/global_mmlu/full/es/_global_mmlu_full_es.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml index 9edb8540..282664e5 100644 --- a/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml +++ b/lm_eval/tasks/global_mmlu/full/fa/_global_mmlu_full_fa.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml index 24fcb6d2..659c9d4b 100644 --- a/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml +++ b/lm_eval/tasks/global_mmlu/full/fil/_global_mmlu_full_fil.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml index e85d6746..7857a193 100644 --- a/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml +++ b/lm_eval/tasks/global_mmlu/full/fr/_global_mmlu_full_fr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml index 08a958bb..a5008417 100644 --- a/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml +++ b/lm_eval/tasks/global_mmlu/full/ha/_global_mmlu_full_ha.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml index ff0a5e8f..4952391a 100644 --- a/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml +++ b/lm_eval/tasks/global_mmlu/full/he/_global_mmlu_full_he.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml index ed54a6ad..c899be84 100644 --- a/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml +++ b/lm_eval/tasks/global_mmlu/full/hi/_global_mmlu_full_hi.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml index f678660e..fd29a2ad 100644 --- a/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml +++ b/lm_eval/tasks/global_mmlu/full/id/_global_mmlu_full_id.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml index a263e295..d5346e1c 100644 --- a/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml +++ b/lm_eval/tasks/global_mmlu/full/ig/_global_mmlu_full_ig.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml index dabb8987..f3bcd694 100644 --- a/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml +++ b/lm_eval/tasks/global_mmlu/full/it/_global_mmlu_full_it.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml index 103460d7..af25573d 100644 --- a/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml +++ b/lm_eval/tasks/global_mmlu/full/ja/_global_mmlu_full_ja.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml index d2225e23..8b9b4197 100644 --- a/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml +++ b/lm_eval/tasks/global_mmlu/full/ko/_global_mmlu_full_ko.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml index 4774599a..33b2a4df 100644 --- a/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml +++ b/lm_eval/tasks/global_mmlu/full/ky/_global_mmlu_full_ky.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml index 93929d42..dd2cd37d 100644 --- a/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml +++ b/lm_eval/tasks/global_mmlu/full/lt/_global_mmlu_full_lt.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml index 05b55948..bcc6cd81 100644 --- a/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml +++ b/lm_eval/tasks/global_mmlu/full/mg/_global_mmlu_full_mg.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml index e5a13645..a322dad5 100644 --- a/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml +++ b/lm_eval/tasks/global_mmlu/full/ms/_global_mmlu_full_ms.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml index ec13a0be..7e9c77ce 100644 --- a/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml +++ b/lm_eval/tasks/global_mmlu/full/ne/_global_mmlu_full_ne.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml index 44f562da..c293a558 100644 --- a/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml +++ b/lm_eval/tasks/global_mmlu/full/nl/_global_mmlu_full_nl.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml index c325bf1d..80739c2c 100644 --- a/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml +++ b/lm_eval/tasks/global_mmlu/full/ny/_global_mmlu_full_ny.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml index 2476fd33..deb51e11 100644 --- a/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml +++ b/lm_eval/tasks/global_mmlu/full/pl/_global_mmlu_full_pl.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml index ac79bda1..7cb9f0fe 100644 --- a/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml +++ b/lm_eval/tasks/global_mmlu/full/pt/_global_mmlu_full_pt.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml index b3aa5f49..f4e5575f 100644 --- a/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml +++ b/lm_eval/tasks/global_mmlu/full/ro/_global_mmlu_full_ro.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml index cc63cd34..3fc05379 100644 --- a/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml +++ b/lm_eval/tasks/global_mmlu/full/ru/_global_mmlu_full_ru.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml index 4deed570..6503e267 100644 --- a/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml +++ b/lm_eval/tasks/global_mmlu/full/si/_global_mmlu_full_si.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml index 98ced987..fd084b32 100644 --- a/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml +++ b/lm_eval/tasks/global_mmlu/full/sn/_global_mmlu_full_sn.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml index 014a4121..91e11b88 100644 --- a/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml +++ b/lm_eval/tasks/global_mmlu/full/so/_global_mmlu_full_so.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml index e322d980..1e0c55a5 100644 --- a/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml +++ b/lm_eval/tasks/global_mmlu/full/sr/_global_mmlu_full_sr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml index a9b0dc1b..078de8b6 100644 --- a/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml +++ b/lm_eval/tasks/global_mmlu/full/sv/_global_mmlu_full_sv.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml index 274543cf..fd87d068 100644 --- a/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml +++ b/lm_eval/tasks/global_mmlu/full/sw/_global_mmlu_full_sw.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml index 5ef0f7ab..0bc967de 100644 --- a/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml +++ b/lm_eval/tasks/global_mmlu/full/te/_global_mmlu_full_te.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml index 8cd3d3f3..aa4ae63f 100644 --- a/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml +++ b/lm_eval/tasks/global_mmlu/full/tr/_global_mmlu_full_tr.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml index e880be32..9bbd14cb 100644 --- a/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml +++ b/lm_eval/tasks/global_mmlu/full/uk/_global_mmlu_full_uk.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml index d6413b35..27888474 100644 --- a/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml +++ b/lm_eval/tasks/global_mmlu/full/vi/_global_mmlu_full_vi.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml index ba9f2460..ef4930b6 100644 --- a/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml +++ b/lm_eval/tasks/global_mmlu/full/yo/_global_mmlu_full_yo.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 diff --git a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml index 098ec097..926c8f88 100644 --- a/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml +++ b/lm_eval/tasks/global_mmlu/full/zh/_global_mmlu_full_zh.yaml @@ -8,4 +8,4 @@ aggregate_metric_list: - metric: acc weight_by_size: True metadata: - version: 1.0 + version: 0.0 -- GitLab From a0466f01b0c0df4c31e4ef357935c69f7b38476b Mon Sep 17 00:00:00 2001 From: Irina Proskurina <72871167+upunaprosk@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:02:44 +0100 Subject: [PATCH 11/32] Add Moral Stories (#2653) * Add moral stories task * Add moral stories task * Create README.md * Update README.md * Update line endings in moral_stories files --- lm_eval/tasks/README.md | 1 + lm_eval/tasks/moral_stories/README.md | 71 +++++++++++++++++++ .../tasks/moral_stories/moral_stories.yaml | 20 ++++++ lm_eval/tasks/moral_stories/utils.py | 21 ++++++ 4 files changed, 113 insertions(+) create mode 100644 lm_eval/tasks/moral_stories/README.md create mode 100644 lm_eval/tasks/moral_stories/moral_stories.yaml create mode 100644 lm_eval/tasks/moral_stories/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index c92043bc..618f2c6e 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -86,6 +86,7 @@ | [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | | [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | | model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English | [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | | [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | | [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | diff --git a/lm_eval/tasks/moral_stories/README.md b/lm_eval/tasks/moral_stories/README.md new file mode 100644 index 00000000..14d6fb2e --- /dev/null +++ b/lm_eval/tasks/moral_stories/README.md @@ -0,0 +1,71 @@ +# Moral Stories + +### Paper + +Title: `Moral Stories: Situated Reasoning about Norms, Intents, Actions, and their Consequences` + +Abstract: `https://aclanthology.org/2021.emnlp-main.54/` + +Moral Stories is a crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations, and their respective consequences. All stories in the dataset consist of seven sentences, belonging to the following categories: + +- Norm: A guideline for social conduct generally observed by most people in everyday situations. +- Situation: Setting of the story that introduces story participants and describes their environment. +- Intention: Reasonable goal that one of the story participants (the actor), wants to fulfill. +- Normative action: An action by the actor that fulfills the intention and observes the norm. +- Normative consequence: Possible effect of the normative action on the actor's environment. +- Divergent action: An action by the actor that fulfills the intention and diverges from the norm. +- Divergent consequence: Possible effect of the divergent action on the actor's environment. + + +Homepage: `https://github.com/demelin/moral_stories` + +The implementation is based on the paper "Histoires Morales: A French Dataset for Assessing Moral Alignment." The source code is available at: `https://github.com/upunaprosk/histoires-morales`. + +### Citation + +``` +@inproceedings{emelin-etal-2021-moral, + title = "Moral Stories: Situated Reasoning about Norms, Intents, Actions, and their Consequences", + author = "Emelin, Denis and + Le Bras, Ronan and + Hwang, Jena D. and + Forbes, Maxwell and + Choi, Yejin", + booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", + month = nov, + year = "2021", + address = "Online and Punta Cana, Dominican Republic", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2021.emnlp-main.54", + doi = "10.18653/v1/2021.emnlp-main.54", + pages = "698--718", + abstract = "In social settings, much of human behavior is governed by unspoken rules of conduct rooted in societal norms. For artificial systems to be fully integrated into social environments, adherence to such norms is a central prerequisite. To investigate whether language generation models can serve as behavioral priors for systems deployed in social settings, we evaluate their ability to generate action descriptions that achieve predefined goals under normative constraints. Moreover, we examine if models can anticipate likely consequences of actions that either observe or violate known norms, or explain why certain actions are preferable by generating relevant norm hypotheses. For this purpose, we introduce Moral Stories, a crowd-sourced dataset of structured, branching narratives for the study of grounded, goal-oriented social reasoning. Finally, we propose decoding strategies that combine multiple expert models to significantly improve the quality of generated actions, consequences, and norms compared to strong baselines.", +} +``` + +### Groups, Tags, and Tasks + +#### Groups + +* Not part of a group yet + +#### Tags + +* `moral_stories`: `Evaluation of the likelihoods of moral actions versus immoral actions. Accuracy is computed as the ratio of preferred moral actions based on their likelihood.` + +#### Tasks + +* `moral_stories.yaml` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/moral_stories/moral_stories.yaml b/lm_eval/tasks/moral_stories/moral_stories.yaml new file mode 100644 index 00000000..0d8e3d99 --- /dev/null +++ b/lm_eval/tasks/moral_stories/moral_stories.yaml @@ -0,0 +1,20 @@ +tag: + - moral_stories +task: moral_stories +dataset_path: demelin/moral_stories +dataset_name: full +output_type: multiple_choice +test_split: train +process_docs: !function utils.process_docs +doc_to_text: "{{query}}" +doc_to_target: "{{label}}" +doc_to_choice: "choices" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/moral_stories/utils.py b/lm_eval/tasks/moral_stories/utils.py new file mode 100644 index 00000000..2e996b74 --- /dev/null +++ b/lm_eval/tasks/moral_stories/utils.py @@ -0,0 +1,21 @@ +import datasets + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _process_doc(doc): + ctx = ( + doc["norm"].capitalize() + + " " + + doc["situation"].capitalize() + + " " + + doc["intention"].capitalize() + ) + choices = [doc["moral_action"], doc["immoral_action"]] + out_doc = { + "query": ctx, + "choices": choices, + "label": 0, + } + return out_doc + + return dataset.map(_process_doc) -- GitLab From 42f791317d3e96dc48faf4f9d7590e38f47e6d24 Mon Sep 17 00:00:00 2001 From: Nicky Pochinkov <52249105+nickypro@users.noreply.github.com> Date: Tue, 28 Jan 2025 17:03:18 +0000 Subject: [PATCH 12/32] add TransformerLens example (#2651) * add TransformerLens example Many people use TransformerLens to do interpretability and interventions on models, and then need to test the model. Here is a simple script that allows one to pass in the TransformerLens model and run evaluations on it. * Ran pre-commit checks --- examples/transformer-lens.py | 59 ++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 examples/transformer-lens.py diff --git a/examples/transformer-lens.py b/examples/transformer-lens.py new file mode 100644 index 00000000..e03576b1 --- /dev/null +++ b/examples/transformer-lens.py @@ -0,0 +1,59 @@ +import warnings + +import torch +import torch.nn as nn +from transformer_lens import HookedTransformer +from transformers import AutoConfig + +from lm_eval import evaluator +from lm_eval.models.huggingface import HFLM + + +def evaluate_lm_eval(lens_model: HookedTransformer, tasks: list[str], **kwargs): + class HFLikeModelAdapter(nn.Module): + """Adapts HookedTransformer to match the HuggingFace interface expected by lm-eval""" + + def __init__(self, model: HookedTransformer): + super().__init__() + self.model = model + self.tokenizer = model.tokenizer + self.config = AutoConfig.from_pretrained(model.cfg.tokenizer_name) + self.device = model.cfg.device + self.tie_weights = lambda: self + + def forward(self, input_ids=None, attention_mask=None, **kwargs): + output = self.model(input_ids, attention_mask=attention_mask, **kwargs) + # Make sure output has the expected .logits attribute + if not hasattr(output, "logits"): + if isinstance(output, torch.Tensor): + output.logits = output + return output + + # Only delegate specific attributes we know we need + def to(self, *args, **kwargs): + return self.model.to(*args, **kwargs) + + def eval(self): + self.model.eval() + return self + + def train(self, mode=True): + self.model.train(mode) + return self + + model = HFLikeModelAdapter(lens_model) + warnings.filterwarnings("ignore", message="Failed to get model SHA for") + results = evaluator.simple_evaluate( + model=HFLM(pretrained=model, tokenizer=model.tokenizer), + tasks=tasks, + verbosity="WARNING", + **kwargs, + ) + return results + + +if __name__ == "__main__": + # Load base model + model = HookedTransformer.from_pretrained("pythia-70m") + res = evaluate_lm_eval(model, tasks=["arc_easy"]) + print(res["results"]) -- GitLab From 96e499baf4fb9a382d7fa3f0bc533d3d20ea72fc Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Tue, 28 Jan 2025 17:04:20 +0000 Subject: [PATCH 13/32] fix multiple input chat tempalte (#2576) * feat: drop Python 3.8 support * feat: drop Python 3.8 tests * pre-commit * handle chat_template for multiple iput --- lm_eval/api/task.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index f14f36e8..dc44150e 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -456,6 +456,7 @@ class Task(abc.ABC): ctx=fewshot_ctx, metadata=(self.config["task"], doc_id, self.config.repeats), apply_chat_template=apply_chat_template, + chat_template=chat_template, ) if not isinstance(inst, list): @@ -1098,6 +1099,8 @@ class ConfigurableTask(Task): if apply_chat_template: if self.multiple_input: # TODO: append prefill? + if not labeled_examples: + return "" return chat_template(labeled_examples) if isinstance(example, str): self.append_target_question( @@ -1350,6 +1353,7 @@ class ConfigurableTask(Task): self, doc: dict, ctx: str, **kwargs ) -> Union[List[Instance], Instance]: apply_chat_template = kwargs.pop("apply_chat_template", False) + chat_template: Callable | None = kwargs.pop("chat_template", None) aux_arguments = None @@ -1364,9 +1368,20 @@ class ConfigurableTask(Task): target_delimiter = "" if self.multiple_input: # If there are multiple inputs, choices are placed in the ctx + # apply chat_template to choices if apply_chat_template cont = self.doc_to_target(doc) + arguments = [ - (ctx + choice, f"{target_delimiter}{cont}") for choice in choices + ( + ctx + + ( + chat_template([{"role": "user", "content": choice}]) + if apply_chat_template + else choice + ), + f"{target_delimiter}{cont}", + ) + for choice in choices ] else: # Otherwise they are placed in the continuation -- GitLab From 94344a61da1f79536542454cc9c26f99c36b978f Mon Sep 17 00:00:00 2001 From: Seungwoo Ryu Date: Wed, 29 Jan 2025 04:56:34 +0900 Subject: [PATCH 14/32] Add Aggregation for Kobest Benchmark (#2446) Co-authored-by: Baber --- lm_eval/tasks/kobest/_kobest.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 lm_eval/tasks/kobest/_kobest.yaml diff --git a/lm_eval/tasks/kobest/_kobest.yaml b/lm_eval/tasks/kobest/_kobest.yaml new file mode 100644 index 00000000..cf23f664 --- /dev/null +++ b/lm_eval/tasks/kobest/_kobest.yaml @@ -0,0 +1,19 @@ +group: kobest +task: + - kobest_boolq + - kobest_copa + - kobest_hellaswag + - kobest_sentineg + - kobest_wic +aggregate_metric_list: + - metric: acc + aggregation: mean + weight_by_size: true + - metric: acc_norm + aggregation: mean + weight_by_size: true + - metric: f1 + aggregation: mean + weight_by_size: true +metadata: + version: 1.0 -- GitLab From 4b4b036318f6afd880969ad50be81a20768cfeb5 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Tue, 28 Jan 2025 21:11:25 +0000 Subject: [PATCH 15/32] update pre-commit (#2660) * nit * update pre-commit --- .pre-commit-config.yaml | 4 ++-- lm_eval/tasks/moral_stories/moral_stories.yaml | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3b5da239..a2465d0e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: mixed-line-ending args: [--fix=lf] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.2 + rev: v0.9.3 hooks: # Run the linter. - id: ruff @@ -38,7 +38,7 @@ repos: # Run the formatter. - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell exclude: > diff --git a/lm_eval/tasks/moral_stories/moral_stories.yaml b/lm_eval/tasks/moral_stories/moral_stories.yaml index 0d8e3d99..46f94b9c 100644 --- a/lm_eval/tasks/moral_stories/moral_stories.yaml +++ b/lm_eval/tasks/moral_stories/moral_stories.yaml @@ -1,5 +1,3 @@ -tag: - - moral_stories task: moral_stories dataset_path: demelin/moral_stories dataset_name: full -- GitLab From fe9c5707f58f41db0539229c2d55f9c164e98260 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Wed, 29 Jan 2025 15:45:57 +0000 Subject: [PATCH 16/32] remove `group` from bigbench task configs (#2663) * remove group from task configs * add tags * update readme --- lm_eval/tasks/bigbench/README.md | 6 ++++++ lm_eval/tasks/bigbench/generate_until_template_yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice_template_a_yaml | 2 +- lm_eval/tasks/bigbench/multiple_choice_template_b_yaml | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/bigbench/README.md b/lm_eval/tasks/bigbench/README.md index be680eac..268f75b6 100644 --- a/lm_eval/tasks/bigbench/README.md +++ b/lm_eval/tasks/bigbench/README.md @@ -30,6 +30,12 @@ Homepage: https://github.com/google/BIG-bench * `group_name`: `Short description` +#### Tags + +* `bigbench_generate_until` +* `bigbench_multiple_choice_a` +* `bigbench_multiple_choice_b` + #### Tasks * `task_name`: `1-sentence description of what this particular task does` diff --git a/lm_eval/tasks/bigbench/generate_until_template_yaml b/lm_eval/tasks/bigbench/generate_until_template_yaml index 25593a4e..c8c30600 100644 --- a/lm_eval/tasks/bigbench/generate_until_template_yaml +++ b/lm_eval/tasks/bigbench/generate_until_template_yaml @@ -1,4 +1,4 @@ -group: bigbench_generate_until +tag: bigbench_generate_until dataset_path: hails/bigbench output_type: generate_until dataset_kwargs: diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml index 4b5f9e89..de210a41 100644 --- a/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml +++ b/lm_eval/tasks/bigbench/multiple_choice_template_a_yaml @@ -1,4 +1,4 @@ -group: bigbench_multiple_choice +tag: bigbench_multiple_choice_a dataset_path: hails/bigbench dataset_kwargs: # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods diff --git a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml index 2900103e..dc695c98 100644 --- a/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml +++ b/lm_eval/tasks/bigbench/multiple_choice_template_b_yaml @@ -1,4 +1,4 @@ -group: bigbench_multiple_choice +tag: bigbench_multiple_choice_b dataset_path: hails/bigbench dataset_kwargs: # num_shots: 0 # TODO: num of shots for `bigbench` HF dataset should be controlled through this, not through the typical methods -- GitLab From 1208afd34ce132e598fcd7e832762630a35d01c6 Mon Sep 17 00:00:00 2001 From: Irina Proskurina <72871167+upunaprosk@users.noreply.github.com> Date: Wed, 29 Jan 2025 16:52:14 +0100 Subject: [PATCH 17/32] Add Histoires Morales task (#2662) * Add Histoires Morales task * Histoires Morales task: fix mixed line endings * Histoires Morales task: fix mixed line endings * Remove tag for a single task * Add some MT for Histoires Morales --- lm_eval/tasks/README.md | 265 +++++++++--------- lm_eval/tasks/histoires_morales/README.md | 62 ++++ .../histoires_morales/histoires_morales.yaml | 17 ++ lm_eval/tasks/histoires_morales/utils.py | 21 ++ 4 files changed, 233 insertions(+), 132 deletions(-) create mode 100644 lm_eval/tasks/histoires_morales/README.md create mode 100644 lm_eval/tasks/histoires_morales/histoires_morales.yaml create mode 100644 lm_eval/tasks/histoires_morales/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 618f2c6e..4f14e9ec 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -5,137 +5,138 @@ For more information, including a full list of task names and their precise meanings or sources, follow the links provided to the individual README.md files for each subfolder. -| Task Family | Description | Language(s) | -|-------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| -| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | -| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | -| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | -| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | -| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | -| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | -| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | -| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | -| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | -| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | -| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | -| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | -| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | -| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | -| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | -| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | -| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | -| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | -| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | -| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | -| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | -| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | -| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | -| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | -| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | -| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | -| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | -| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | -| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | -| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | -| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | -| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | -| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | -| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | -| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | -| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | -| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | -| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | -| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | -| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | -| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | -| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | -| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | -| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | -| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | -| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | -| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | -| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | -| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | -| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | -| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | -| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | -| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | -| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | -| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | -| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | -| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | -| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | -| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | +| Task Family | Description | Language(s) | +|--------------------------------------------------------------------------|-------------|-------------------------------------------------------------------------------------------------------------------------------| +| [aclue](aclue/README.md) | Tasks focusing on ancient Chinese language understanding and cultural aspects. | Ancient Chinese | +| [aexams](aexams/README.md) | Tasks in Arabic related to various academic exams covering a range of subjects. | Arabic | +| [agieval](agieval/README.md) | Tasks involving historical data or questions related to history and historical texts. | English, Chinese | +| [anli](anli/README.md) | Adversarial natural language inference tasks designed to test model robustness. | English | +| [arabic_leaderboard_complete](arabic_leaderboard_complete/README.md) | A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabic_leaderboard_light](arabic_leaderboard_light/README.md) | A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated. | Arabic (Some MT) | +| [arabicmmlu](arabicmmlu/README.md) | Localized Arabic version of MMLU with multiple-choice questions from 40 subjects. | Arabic | +| [AraDICE](aradice/README.md) | A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs). | Arabic | +| [arc](arc/README.md) | Tasks involving complex reasoning over a diverse set of questions. | English | +| [arithmetic](arithmetic/README.md) | Tasks involving numerical computations and arithmetic reasoning. | English | +| [asdiv](asdiv/README.md) | Tasks involving arithmetic and mathematical reasoning challenges. | English | +| [babi](babi/README.md) | Tasks designed as question and answering challenges based on simulated stories. | English | +| [basque_bench](basque_bench/README.md) | Collection of tasks in Basque encompassing various evaluation areas. | Basque | +| [basqueglue](basqueglue/README.md) | Tasks designed to evaluate language understanding in Basque language. | Basque | +| [bbh](bbh/README.md) | Tasks focused on deep semantic understanding through hypothesization and reasoning. | English, German | +| [belebele](belebele/README.md) | Language understanding tasks in a variety of languages and scripts. | Multiple (122 languages) | +| benchmarks | General benchmarking tasks that test a wide range of language understanding capabilities. | | +| [bertaqa](bertaqa/README.md) | Local Basque cultural trivia QA tests in English and Basque languages. | English, Basque, Basque (MT) | +| [bigbench](bigbench/README.md) | Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models. | Multiple | +| [blimp](blimp/README.md) | Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities. | English | +| [catalan_bench](catalan_bench/README.md) | Collection of tasks in Catalan encompassing various evaluation areas. | Catalan | +| [ceval](ceval/README.md) | Tasks that evaluate language understanding and reasoning in an educational context. | Chinese | +| [cmmlu](cmmlu/README.md) | Multi-subject multiple choice question tasks for comprehensive academic assessment. | Chinese | +| code_x_glue | Tasks that involve understanding and generating code across multiple programming languages. | Go, Java, JS, PHP, Python, Ruby | +| [commonsense_qa](commonsense_qa/README.md) | CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge. | English | +| [copal_id](copal_id/README.md) | Indonesian causal commonsense reasoning dataset that captures local nuances. | Indonesian | +| [coqa](coqa/README.md) | Conversational question answering tasks to test dialog understanding. | English | +| [crows_pairs](crows_pairs/README.md) | Tasks designed to test model biases in various sociodemographic groups. | English, French | +| csatqa | Tasks related to SAT and other standardized testing questions for academic assessment. | Korean | +| [drop](drop/README.md) | Tasks requiring numerical reasoning, reading comprehension, and question answering. | English | +| [eq_bench](eq_bench/README.md) | Tasks focused on equality and ethics in question answering and decision-making. | English | +| [eus_exams](eus_exams/README.md) | Tasks based on various professional and academic exams in the Basque language. | Basque | +| [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | +| [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | +| [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | +| [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | +| [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | +| [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | +| [galician_bench](galician_bench/README.md) | Collection of tasks in Galician encompassing various evaluation areas. | Galician | +| [global_mmlu](global_mmlu/README.md) | Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits. | Multiple (15 languages) | +| [glue](glue/README.md) | General Language Understanding Evaluation benchmark to test broad language abilities. | English | +| [gpqa](gpqa/README.md) | Tasks designed for general public question answering and knowledge verification. | English | +| [gsm8k](gsm8k/README.md) | A benchmark of grade school math problems aimed at evaluating reasoning capabilities. | English | +| [haerae](haerae/README.md) | Tasks focused on assessing detailed factual and historical knowledge. | Korean | +| [headqa](headqa/README.md) | A high-level education-based question answering dataset to test specialized knowledge. | Spanish, English | +| [hellaswag](hellaswag/README.md) | Tasks to predict the ending of stories or scenarios, testing comprehension and creativity. | English | +| [hendrycks_ethics](hendrycks_ethics/README.md) | Tasks designed to evaluate the ethical reasoning capabilities of models. | English | +| [hendrycks_math](hendrycks_math/README.md) | Mathematical problem-solving tasks to test numerical reasoning and problem-solving. | English | +| [histoires_morales](histoires_morales/README.md) | A dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | French (Some MT) | +| [hrm8k](hrm8k/README.md) | A challenging bilingual math reasoning benchmark for Korean and English. | Korean (Some MT), English (Some MT) | +| [humaneval](humaneval/README.md) | Code generation task that measure functional correctness for synthesizing programs from docstrings. | Python | +| [ifeval](ifeval/README.md) | Interactive fiction evaluation tasks for narrative understanding and reasoning. | English | +| [inverse_scaling](inverse_scaling/README.md) | Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse. | English | +| [japanese_leaderboard](japanese_leaderboard/README.md) | Japanese language understanding tasks to benchmark model performance on various linguistic aspects. | Japanese | +| [kbl](kbl/README.md) | Korean Benchmark for Legal Language Understanding. | Korean | +| [kmmlu](kmmlu/README.md) | Knowledge-based multi-subject multiple choice questions for academic evaluation. | Korean | +| [kobest](kobest/README.md) | A collection of tasks designed to evaluate understanding in Korean language. | Korean | +| [kormedmcqa](kormedmcqa/README.md) | Medical question answering tasks in Korean to test specialized domain knowledge. | Korean | +| [lambada](lambada/README.md) | Tasks designed to predict the endings of text passages, testing language prediction skills. | English | +| [lambada_cloze](lambada_cloze/README.md) | Cloze-style LAMBADA dataset. | English | +| [lambada_multilingual](lambada_multilingual/README.md) | Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`. | German, English, Spanish, French, Italian | | [lambada_multilingual_stablelm](lambada_multilingual_stablelm/README.md) | Multilingual LAMBADA dataset. Users should prefer evaluating on this version of the multilingual dataset instead of on `lambada_multilingual`. | German, English, Spanish, French, Italian, Dutch, Portuguese | -| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | -| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | -| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | -| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | -| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | -| [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | -| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | -| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | -| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | -| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | -| medqa | Multiple choice question answering based on the United States Medical License Exams. | | -| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | -| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | -| [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | -| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | -| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | -| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | -| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | -| [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English -| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | -| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | -| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | -| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | +| [leaderboard](leaderboard/README.md) | Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time | English | +| [lingoly](lingoly/README.md) | Challenging logical reasoning benchmark in low-resource languages with controls for memorization | English, Multilingual | +| [logiqa](logiqa/README.md) | Logical reasoning tasks requiring advanced inference and deduction. | English, Chinese | +| [logiqa2](logiqa2/README.md) | Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination. | English, Chinese | +| [mathqa](mathqa/README.md) | Question answering tasks involving mathematical reasoning and problem-solving. | English | +| [mbpp](mbpp/README.md) | A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions. | Python | +| [mc_taco](mc_taco/README.md) | Question-answer pairs that require temporal commonsense comprehension. | English | +| [med_concepts_qa](med_concepts_qa/README.md) | Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept. | English | +| [metabench](metabench/README.md) | Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait. | English | +| medmcqa | Medical multiple choice questions assessing detailed medical knowledge. | English | +| medqa | Multiple choice question answering based on the United States Medical License Exams. | | +| [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | +| [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | +| [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | +| [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | +| [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | +| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | +| model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | +| [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English +| [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English | +| [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English | +| [okapi/arc_multilingual](okapi/arc_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | +| [okapi/hellaswag_multilingual](okapi/hellaswag_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (30 languages) **Machine Translated.** | +| okapi/mmlu_multilingual | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (34 languages) **Machine Translated.** | | [okapi/truthfulqa_multilingual](okapi/truthfulqa_multilingual/README.md) | Tasks that involve reading comprehension and information retrieval challenges. | Multiple (31 languages) **Machine Translated.** | -| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | -| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | -| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | -| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | -| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | -| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | -| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | -| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | -| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | -| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | -| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | -| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | -| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | -| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | -| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | -| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | -| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | -| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | -| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | -| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | -| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | -| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | -| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | -| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | -| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | -| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | -| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | -| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | -| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | -| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | -| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | -| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | -| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | -| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | -| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | -| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | -| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | -| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | -| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | -| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | -| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | -| [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | -| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | -| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | -| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | -| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | +| [openbookqa](openbookqa/README.md) | Open-book question answering tasks that require external knowledge and reasoning. | English | +| [paloma](paloma/README.md) | Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit. | English | +| [paws-x](paws-x/README.md) | Paraphrase Adversaries from Word Scrambling, focusing on cross-lingual capabilities. | English, French, Spanish, German, Chinese, Japanese, Korean | +| [pile](pile/README.md) | Open source language modelling data set that consists of 22 smaller, high-quality datasets. | English | +| [pile_10k](pile_10k/README.md) | The first 10K elements of The Pile, useful for debugging models trained on it. | English | +| [piqa](piqa/README.md) | Physical Interaction Question Answering tasks to test physical commonsense reasoning. | English | +| [polemo2](polemo2/README.md) | Sentiment analysis and emotion detection tasks based on Polish language data. | Polish | +| [portuguese_bench](portuguese_bench/README.md) | Collection of tasks in European Portuguese encompassing various evaluation areas. | Portuguese | +| [prost](prost/README.md) | Tasks requiring understanding of professional standards and ethics in various domains. | English | +| [pubmedqa](pubmedqa/README.md) | Question answering tasks based on PubMed research articles for biomedical understanding. | English | +| [qa4mre](qa4mre/README.md) | Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning. | English | +| [qasper](qasper/README.md) | Question Answering dataset based on academic papers, testing in-depth scientific knowledge. | English | +| [race](race/README.md) | Reading comprehension assessment tasks based on English exams in China. | English | +| realtoxicityprompts | Tasks to evaluate language models for generating text with potential toxicity. | | +| [sciq](sciq/README.md) | Science Question Answering tasks to assess understanding of scientific concepts. | English | +| [score](score/README.md) | Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH) | English | +| [scrolls](scrolls/README.md) | Tasks that involve long-form reading comprehension across various domains. | English | +| [siqa](siqa/README.md) | Social Interaction Question Answering to evaluate common sense and social reasoning. | English | +| [spanish_bench](spanish_bench/README.md) | Collection of tasks in Spanish encompassing various evaluation areas. | Spanish | +| [squad_completion](squad_completion/README.md) | A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs. | English | +| [squadv2](squadv2/README.md) | Stanford Question Answering Dataset version 2, a reading comprehension benchmark. | English | +| [storycloze](storycloze/README.md) | Tasks to predict story endings, focusing on narrative logic and coherence. | English | +| [super_glue](super_glue/README.md) | A suite of challenging tasks designed to test a range of language understanding skills. | English | +| [swag](swag/README.md) | Situations With Adversarial Generations, predicting the next event in videos. | English | +| [swde](swde/README.md) | Information extraction tasks from semi-structured web pages. | English | +| [tinyBenchmarks](tinyBenchmarks/README.md) | Evaluation of large language models with fewer examples using tiny versions of popular benchmarks. | English | +| [tmmluplus](tmmluplus/README.md) | An extended set of tasks under the TMMLU framework for broader academic assessments. | Traditional Chinese | +| [toxigen](toxigen/README.md) | Tasks designed to evaluate language models on their propensity to generate toxic content. | English | +| [translation](translation/README.md) | Tasks focused on evaluating the language translation capabilities of models. | Arabic, English, Spanish, Basque, Hindi, Indonesian, Burmese, Russian, Swahili, Telugu, Chinese | +| [triviaqa](triviaqa/README.md) | A large-scale dataset for trivia question answering to test general knowledge. | English | +| [truthfulqa](truthfulqa/README.md) | A QA task aimed at evaluating the truthfulness and factual accuracy of model responses. | English | +| [turkishmmlu](turkishmmlu/README.md) | A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams. | Turkish | +| [unitxt](unitxt/README.md) | A number of tasks implemented using the unitxt library for flexible, shareable, and reusable data preparation and evaluation for generative AI. | English | +| [unscramble](unscramble/README.md) | Tasks involving the rearrangement of scrambled sentences to test syntactic understanding. | English | +| [webqs](webqs/README.md) | Web-based question answering tasks designed to evaluate internet search and retrieval. | English | +| [wikitext](wikitext/README.md) | Tasks based on text from Wikipedia articles to assess language modeling and generation. | English | +| [winogrande](winogrande/README.md) | A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge. | English | +| [wmdp](wmdp/README.md) | A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions. | English | +| [wmt2016](wmt2016/README.md) | Tasks from the WMT 2016 shared task, focusing on translation between multiple languages. | English, Czech, German, Finnish, Russian, Romanian, Turkish | +| [wsc273](wsc273/README.md) | The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution. | English | +| [xcopa](xcopa/README.md) | Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages. | Estonian, Haitian, Indonesian, Italian, Quechua, Swahili, Tamil, Thai, Turkish, Vietnamese, Chinese | +| [xnli](xnli/README.md) | Cross-Lingual Natural Language Inference to test understanding across different languages. | Arabic, Bulgarian, German, Greek, English, Spanish, French, Hindi, Russian, Swahili, Thai, Turkish, Urdu, Vietnamese, Chinese | +| [xnli_eu](xnli_eu/README.md) | Cross-lingual Natural Language Inference tasks in Basque. | Basque | +| [xquad](xquad/README.md) | Cross-lingual Question Answering Dataset in multiple languages. | Arabic, German, Greek, English, Spanish, Hindi, Romanian, Russian, Thai, Turkish, Vietnamese, Chinese | +| [xstorycloze](xstorycloze/README.md) | Cross-lingual narrative understanding tasks to predict story endings in multiple languages. | Russian, Simplified Chinese, Spanish, Arabic, Hindi, Indonesian, Telugu, Swahili, Basque, Burmese | +| [xwinograd](xwinograd/README.md) | Cross-lingual Winograd schema tasks for coreference resolution in multiple languages. | English, French, Japanese, Portuguese, Russian, Chinese | diff --git a/lm_eval/tasks/histoires_morales/README.md b/lm_eval/tasks/histoires_morales/README.md new file mode 100644 index 00000000..cbd14ec3 --- /dev/null +++ b/lm_eval/tasks/histoires_morales/README.md @@ -0,0 +1,62 @@ +# Histoires Morales + +### Paper + +Title: `Histoires Morales: A French Dataset for Assessing Moral Alignment` + +Abstract: `https://arxiv.org/pdf/2501.17117` + +⚖ Histoires Morales is the first dataset for moral model alignment evaluation in French. It consists of narratives describing normative and norm-divergent actions taken by individuals to achieve certain intentions in concrete situations, along with their respective consequences. +Each of the 12,000 stories (histoires) follows the same seven-sentence structure as the Moral Stories dataset: + +Context: + +1. Norm: A guideline for social conduct generally observed by most people in everyday situations. +2. Situation: The setting of the story, introducing participants and describing their environment. +3. Intention: A reasonable goal that one of the story participants (the actor) wants to achieve. + +Normative path: +4. Normative action: An action by the actor that fulfills the intention while observing the norm. +5. Normative consequence: A possible effect of the normative action on the actor’s environment. + +Norm-divergent path: +6. Divergent action: An action by the actor that fulfills the intention but diverges from the norm. +7. Divergent consequence: A possible effect of the divergent action on the actor’s environment. + +Histoires Morales is adapted to French from the widely used Moral Stories dataset. +We translated the Moral Stories dataset and refined these translations through manual annotations. +See paper for more details. + +Homepage: `https://huggingface.co/datasets/LabHC/histoires_morales` + + +### Citation + +Coming soon (accepted to NAACL 2025) + +### Groups, Tags, and Tasks + +#### Groups + +* Not part of a group yet + +#### Tags + +No tags, since there is a single task. + +#### Tasks + +* `histoires_morales.yaml` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [ ] Have you referenced the original paper that introduced the task? + * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/histoires_morales/histoires_morales.yaml b/lm_eval/tasks/histoires_morales/histoires_morales.yaml new file mode 100644 index 00000000..88fcc402 --- /dev/null +++ b/lm_eval/tasks/histoires_morales/histoires_morales.yaml @@ -0,0 +1,17 @@ +task: histoires_morales +dataset_path: LabHC/histoires_morales +output_type: multiple_choice +test_split: train +process_docs: !function utils.process_docs +doc_to_text: "{{query}}" +doc_to_target: "{{label}}" +doc_to_choice: "choices" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/histoires_morales/utils.py b/lm_eval/tasks/histoires_morales/utils.py new file mode 100644 index 00000000..2e996b74 --- /dev/null +++ b/lm_eval/tasks/histoires_morales/utils.py @@ -0,0 +1,21 @@ +import datasets + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _process_doc(doc): + ctx = ( + doc["norm"].capitalize() + + " " + + doc["situation"].capitalize() + + " " + + doc["intention"].capitalize() + ) + choices = [doc["moral_action"], doc["immoral_action"]] + out_doc = { + "query": ctx, + "choices": choices, + "label": 0, + } + return out_doc + + return dataset.map(_process_doc) -- GitLab From 0bb8406f2ebfe074cf173c333bdcd6cffb17279b Mon Sep 17 00:00:00 2001 From: asgsaeid <43481290+asgsaeid@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:57:48 -0800 Subject: [PATCH 18/32] MMLU Pro Plus (#2366) * mmlu-pro-plus is implemented * README file is updated * Update README.md with new task: MMLU Pro Plus * Update README.md with new task: MMLU Pro Plus * pre-commit * nit --------- Co-authored-by: asgsaeid Co-authored-by: Baber --- lm_eval/tasks/README.md | 1 + lm_eval/tasks/mmlu-pro-plus/README.md | 70 +++++++++++++++++++ .../mmlu-pro-plus/_default_template_yaml | 33 +++++++++ .../tasks/mmlu-pro-plus/_mmlu_pro_plus.yaml | 23 ++++++ .../mmlu-pro-plus/mmlu_pro_plus_biology.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_business.yaml | 5 ++ .../mmlu_pro_plus_chemistry.yaml | 5 ++ .../mmlu_pro_plus_computer_science.yaml | 5 ++ .../mmlu_pro_plus_economics.yaml | 5 ++ .../mmlu_pro_plus_engineering.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_health.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_history.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_law.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_math.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_other.yaml | 5 ++ .../mmlu_pro_plus_philosophy.yaml | 5 ++ .../mmlu-pro-plus/mmlu_pro_plus_physics.yaml | 5 ++ .../mmlu_pro_plus_psychology.yaml | 5 ++ lm_eval/tasks/mmlu-pro-plus/utils.py | 63 +++++++++++++++++ 19 files changed, 260 insertions(+) create mode 100644 lm_eval/tasks/mmlu-pro-plus/README.md create mode 100644 lm_eval/tasks/mmlu-pro-plus/_default_template_yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/_mmlu_pro_plus.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_biology.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_business.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_economics.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_engineering.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_health.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_history.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_law.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_math.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_other.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_physics.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_psychology.yaml create mode 100644 lm_eval/tasks/mmlu-pro-plus/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 4f14e9ec..7a53dce9 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -85,6 +85,7 @@ | [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | | [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | | [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | +| [mmlu-pro-plus](mmlu-pro-plus/README.md) | A new test set for evaluating shortcut learning and higher-order reasoning of LLMs. | English | | [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | | model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | | | [moral_stories](moral_stories/README.md) | A crowd-sourced dataset of structured narratives that describe normative and norm-divergent actions taken by individuals to accomplish certain intentions in concrete situations. | English diff --git a/lm_eval/tasks/mmlu-pro-plus/README.md b/lm_eval/tasks/mmlu-pro-plus/README.md new file mode 100644 index 00000000..6e8e0099 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/README.md @@ -0,0 +1,70 @@ +# mmlu_pro_plus + +### Paper + +Title: `MMLU-Pro+: Evaluating Higher-Order Reasoning and Shortcut Learning in LLMs` + +Abstract: `Existing benchmarks for large language models (LLMs) increasingly struggle to differentiate between +top-performing models, underscoring the need for more challenging evaluation frameworks. +We introduce MMLU-Pro+, an enhanced benchmark building upon MMLU-Pro to assess shortcut +learning and higher-order reasoning in LLMs. By incorporating questions with multiple +correct answers across diverse domains, MMLU-Pro+ tests LLMs' ability to engage in complex +reasoning and resist simplistic problem-solving strategies. Our results show that +MMLU-Pro+ maintains MMLU-Pro's difficulty while providing a more rigorous test of +model discrimination, particularly in multi-correct answer scenarios. +We introduce novel metrics like shortcut selection ratio and correct pair identification +ratio, offering deeper insights into model behavior and anchoring bias. +Evaluations of six state-of-the-art LLMs reveal significant performance gaps, +highlighting variations in reasoning abilities and bias susceptibility.` + +Homepage: https://github.com/asgsaeid/mmlu-pro-plus + +### Citation + +```bibtex +@article{taghanaki2024mmlu, + title={MMLU-Pro+: Evaluating Higher-Order Reasoning and Shortcut Learning in LLMs}, + author={Taghanaki, Saeid Asgari and Khani, Aliasgahr and Khasahmadi, Amir}, + journal={arXiv preprint arXiv:2409.02257}, + year={2024} +} +``` + +### Groups and Tasks + +#### Groups + +* `mmlu_pro_plus`: 'All 14 subjects of the mmlu_pro_plus dataset, evaluated following the methodology in mmlu's original implementation' + +#### Tasks + +The following tasks evaluate subjects in the mmlu_pro dataset +- `mmlu_pro_plus_biology` +- `mmlu_pro_plus_business` +- `mmlu_pro_plus_chemistry` +- `mmlu_pro_plus_computer_science` +- `mmlu_pro_plus_economics` +- `mmlu_pro_plus_engineering` +- `mmlu_pro_plus_health` +- `mmlu_pro_plus_history` +- `mmlu_pro_plus_law` +- `mmlu_pro_plus_math` +- `mmlu_pro_plus_other` +- `mmlu_pro_plus_philosophy` +- `mmlu_pro_plus_physics` +- `mmlu_pro_plus_psychology` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [x] Have you noted which, if any, published evaluation setups are matched by this variant? + +### Changelog diff --git a/lm_eval/tasks/mmlu-pro-plus/_default_template_yaml b/lm_eval/tasks/mmlu-pro-plus/_default_template_yaml new file mode 100644 index 00000000..7ece65fb --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/_default_template_yaml @@ -0,0 +1,33 @@ +dataset_path: saeidasgari/mmlu-pro-plus +test_split: test +fewshot_split: validation +fewshot_config: + sampler: first_n + doc_to_text: !function utils.fewshot_to_text + doc_to_target: "" +output_type: generate_until +doc_to_text: !function utils.doc_to_text +doc_to_target: answer +filter_list: + - name: "custom-extract" + filter: + - function: "regex" + regex_pattern: 'answer is \(?([ABCDEFGHIJKL])\)?' + # regex_pattern: r".*[aA]nswer:\s*([A-L])", + - function: "take_first" +generation_kwargs: + until: + - "" + - "Q:" + - "<|im_end|>" + do_sample: false + temperature: 0.0 +num_fewshot: 5 +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + version: 1.0 diff --git a/lm_eval/tasks/mmlu-pro-plus/_mmlu_pro_plus.yaml b/lm_eval/tasks/mmlu-pro-plus/_mmlu_pro_plus.yaml new file mode 100644 index 00000000..e92c9a05 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/_mmlu_pro_plus.yaml @@ -0,0 +1,23 @@ +group: mmlu_pro_plus +task: + - mmlu_pro_plus_biology + - mmlu_pro_plus_business + - mmlu_pro_plus_chemistry + - mmlu_pro_plus_computer_science + - mmlu_pro_plus_economics + - mmlu_pro_plus_engineering + - mmlu_pro_plus_health + - mmlu_pro_plus_history + - mmlu_pro_plus_law + - mmlu_pro_plus_math + - mmlu_pro_plus_other + - mmlu_pro_plus_philosophy + - mmlu_pro_plus_physics + - mmlu_pro_plus_psychology +aggregate_metric_list: + - aggregation: mean + metric: exact_match + weight_by_size: true + filter_list: custom-extract +metadata: + version: 1.0 diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_biology.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_biology.yaml new file mode 100644 index 00000000..8ec15466 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_biology.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about biology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_biology" +task_alias: "biology" +process_docs: !function utils.process_biology diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_business.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_business.yaml new file mode 100644 index 00000000..f3488599 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_business.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about business. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_business" +task_alias: "business" +process_docs: !function utils.process_business diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_chemistry.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_chemistry.yaml new file mode 100644 index 00000000..e498bfd8 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_chemistry.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about chemistry. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_chemistry" +task_alias: "chemistry" +process_docs: !function utils.process_chemistry diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_computer_science.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_computer_science.yaml new file mode 100644 index 00000000..b652343b --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_computer_science.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about computer science. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_computer_science" +task_alias: "computer_science" +process_docs: !function utils.process_computer_science diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_economics.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_economics.yaml new file mode 100644 index 00000000..2f7e2d1b --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_economics.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about economics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_economics" +task_alias: "economics" +process_docs: !function utils.process_economics diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_engineering.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_engineering.yaml new file mode 100644 index 00000000..745def5f --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_engineering.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about engineering. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_engineering" +task_alias: "engineering" +process_docs: !function utils.process_engineering diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_health.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_health.yaml new file mode 100644 index 00000000..b8f65c2b --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_health.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about health. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_health" +task_alias: "health" +process_docs: !function utils.process_health diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_history.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_history.yaml new file mode 100644 index 00000000..0b562ae2 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_history.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about history. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_history" +task_alias: "history" +process_docs: !function utils.process_history diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_law.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_law.yaml new file mode 100644 index 00000000..0af0e50b --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_law.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_law" +task_alias: "law" +process_docs: !function utils.process_law diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_math.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_math.yaml new file mode 100644 index 00000000..a1d8c95e --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_math.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_math" +task_alias: "math" +process_docs: !function utils.process_math diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_other.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_other.yaml new file mode 100644 index 00000000..9a2372ae --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_other.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_other" +task_alias: "other" +process_docs: !function utils.process_other diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_philosophy.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_philosophy.yaml new file mode 100644 index 00000000..c1768cab --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_philosophy.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about philosophy. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_philosophy" +task_alias: "philosophy" +process_docs: !function utils.process_philosophy diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_physics.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_physics.yaml new file mode 100644 index 00000000..8a4b25c0 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_physics.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about physics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_physics" +task_alias: "physics" +process_docs: !function utils.process_physics diff --git a/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_psychology.yaml b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_psychology.yaml new file mode 100644 index 00000000..497c3f35 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/mmlu_pro_plus_psychology.yaml @@ -0,0 +1,5 @@ +description: "The following are multiple choice questions (with answers) about psychology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" +include: "_default_template_yaml" +task: "mmlu_pro_plus_psychology" +task_alias: "psychology" +process_docs: !function utils.process_psychology diff --git a/lm_eval/tasks/mmlu-pro-plus/utils.py b/lm_eval/tasks/mmlu-pro-plus/utils.py new file mode 100644 index 00000000..03117be5 --- /dev/null +++ b/lm_eval/tasks/mmlu-pro-plus/utils.py @@ -0,0 +1,63 @@ +from functools import partial + + +choices = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", +] + + +def format_cot_example(example, including_answer=True): + prompt = "Question:\n" + question = example["question"] + options = example["options"] + prompt += question + "\n" + prompt += "Options:\n" + for i, opt in enumerate(options): + prompt += "{}. {}\n".format(choices[i], opt) + if including_answer: + cot_content = example["cot_content"].replace( + "A: Let's think step by step.", "Answer: Let's think step by step." + ) + prompt += cot_content + "\n\n" + else: + prompt += "Answer: Let's think step by step." + return prompt + + +doc_to_text = partial(format_cot_example, including_answer=False) +fewshot_to_text = partial(format_cot_example, including_answer=True) + + +def process_docs(dataset, subject): + return dataset.filter(lambda x: x["category"] == subject) + + +process_biology = partial(process_docs, subject="biology") +process_business = partial(process_docs, subject="business") +process_chemistry = partial(process_docs, subject="chemistry") +process_computer_science = partial(process_docs, subject="computer science") +process_economics = partial(process_docs, subject="economics") +process_engineering = partial(process_docs, subject="engineering") +process_health = partial(process_docs, subject="health") +process_history = partial(process_docs, subject="history") +process_law = partial(process_docs, subject="law") +process_math = partial(process_docs, subject="math") +process_other = partial(process_docs, subject="other") +process_philosophy = partial(process_docs, subject="philosophy") +process_physics = partial(process_docs, subject="physics") +process_psychology = partial(process_docs, subject="psychology") -- GitLab From 144a1e58be73f937f8fecaae886346681d0fa082 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Thu, 6 Feb 2025 06:32:54 +0000 Subject: [PATCH 19/32] fix early return for multuple dict (#2673) --- lm_eval/api/task.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index dc44150e..a1cf5519 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1621,13 +1621,13 @@ class ConfigurableTask(Task): ) except TypeError: # needed for now in order to use a different interface between our own metrics and HF Evaluate metrics result_score = self._metric_fn_list[metric]([gold, result]) - if isinstance(result_score, dict): - # TODO: this handles the case where HF evaluate returns a dict. - # This allows for multiple metrics to be returned from the same function - for k, v in result_score.items(): - result_dict[k] = v - return result_dict - result_dict[metric] = result_score + if isinstance(result_score, dict): + # TODO: this handles the case where HF evaluate returns a dict. + # This allows for multiple metrics to be returned from the same function + for k, v in result_score.items(): + result_dict[k] = v + else: + result_dict[metric] = result_score else: raise ValueError( f"Passed invalid output_type '{self.OUTPUT_TYPE}' ! Please use one of ", -- GitLab From 53504a969dd66e7191c41747601b684f2d122adf Mon Sep 17 00:00:00 2001 From: Arda Date: Fri, 7 Feb 2025 14:53:12 +0100 Subject: [PATCH 20/32] Turkish mmlu Config Update (#2678) * Added TurkishMMLU to LM Evaluation Harness * Fixed COT name * Fixed COT name * Updated Readme * Fixed Test issues * Completed Scan for changed tasks * Updated Readme * Update README.md * fixup task naming casing + ensure yaml template stubs aren't registered * Fix Regex Pattern for CoT experiments * Fixed multiple choice accuracy --------- Co-authored-by: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> Co-authored-by: haileyschoelkopf --- lm_eval/tasks/turkishmmlu/config/_turkishmmlu_default_yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/turkishmmlu/config/_turkishmmlu_default_yaml b/lm_eval/tasks/turkishmmlu/config/_turkishmmlu_default_yaml index e1dae1a9..e4027f09 100644 --- a/lm_eval/tasks/turkishmmlu/config/_turkishmmlu_default_yaml +++ b/lm_eval/tasks/turkishmmlu/config/_turkishmmlu_default_yaml @@ -9,7 +9,7 @@ fewshot_config: output_type: multiple_choice doc_to_text: "Soru: {{ question.strip() }}\nA. {{ choices[0] }}\nB. {{ choices[1] }}\nC. {{ choices[2] }}\nD. {{ choices[3] }}\nE. {{ choices[4] }}\nCevap:" doc_to_choice: ["A", "B", "C", "D", "E"] -doc_to_target: "{{['A', 'B', 'C', 'D', 'E'].index(answer)}}" +doc_to_target: "{{ answer.strip() }}" metric_list: - metric: acc aggregation: mean -- GitLab From 8fe3435a219a9d1e55fbc3ccfb19db0f9c71f8fc Mon Sep 17 00:00:00 2001 From: omahs <73983677+omahs@users.noreply.github.com> Date: Fri, 7 Feb 2025 18:01:26 +0100 Subject: [PATCH 21/32] Fix typos (#2679) * fix typo * fix typos * fix typos --- docs/interface.md | 2 +- docs/new_task_guide.md | 10 +++++----- docs/task_guide.md | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/interface.md b/docs/interface.md index cea1aab0..13c7a289 100644 --- a/docs/interface.md +++ b/docs/interface.md @@ -8,7 +8,7 @@ A majority of users run the library by cloning it from Github, installing the pa Equivalently, running the library can be done via the `lm-eval` entrypoint at the command line. -This mode supports a number of command-line arguments, the details of which can be also be seen via running with `-h` or `--help`: +This mode supports a number of command-line arguments, the details of which can also be seen via running with `-h` or `--help`: - `--model` : Selects which model type or provider is evaluated. Must be a string corresponding to the name of the model type/provider being used. See [the main README](https://github.com/EleutherAI/lm-evaluation-harness/tree/main#model-apis-and-inference-servers) for a full list of enabled model names and supported libraries or APIs. diff --git a/docs/new_task_guide.md b/docs/new_task_guide.md index a822a887..3fc24f19 100644 --- a/docs/new_task_guide.md +++ b/docs/new_task_guide.md @@ -143,7 +143,7 @@ The next thing we need to do is decide what format to use when presenting the da To write a prompt, users will use `doc_to_text`, `doc_to_target`, and `doc_to_choice` (Optional when certain conditions are met). -`doc_to_text` defines the input string a model will be given while `doc_to_target` and `doc_to_choice` will be used to generate the target text. `doc_to_target` can be either a text string that refers to the target string or an integer that refers to the index of the correct label. When it is set as an index, `doc_to_choice` must be also be set with the appropriate list of possible choice strings. +`doc_to_text` defines the input string a model will be given while `doc_to_target` and `doc_to_choice` will be used to generate the target text. `doc_to_target` can be either a text string that refers to the target string or an integer that refers to the index of the correct label. When it is set as an index, `doc_to_choice` must also be set with the appropriate list of possible choice strings. ### Basic prompts @@ -172,7 +172,7 @@ doc_to_choice: choices We support the [Jinja 2](https://jinja.palletsprojects.com/en/3.1.x/) templating language for writing prompts. In practice, this means you can take your dataset's columns and do many basic string manipulations to place each document into prompted format. -Take for example the dataset `super_glue/boolq`. As input, we'd like to use the features `passage` and `question` and string them together so that for a a sample line `doc`, the model sees something the format of: +Take for example the dataset `super_glue/boolq`. As input, we'd like to use the features `passage` and `question` and string them together so that for a sample line `doc`, the model sees something in the format of: ``` doc["passage"] Question: doc["question"]? @@ -284,7 +284,7 @@ As a heuristic check: * Do you expect to compute metrics after applying multiple such processing steps on your model outputs? * Does your task rely on metrics that need a custom implementation? -For more detail on the task system and advanced features, see [`docs/task_guide.md`](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md) . If none of the above sound like they apply to your task, it's time to continue onto checking your task performance! +For more detail on the task system and advanced features, see [`docs/task_guide.md`](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md). If none of the above sounds like they apply to your task, it's time to continue onto checking your task performance! ### Task name + tags (registering a task) @@ -383,7 +383,7 @@ task: ### Configuring python classes -There can occasions when yaml-based tasks cannot accommodate how a task is handled. LM-Eval supports the manually implementing tasks as was previously done before `0.4.x`. To register the task, you can simply make a yaml with the name of the task in `task` and the class object in `class` using the `!function` prefix. +There can be occasions when yaml-based tasks cannot accommodate how a task is handled. LM-Eval supports the manually implementing tasks as was previously done before `0.4.x`. To register the task, you can simply make a yaml with the name of the task in `task` and the class object in `class` using the `!function` prefix. ```yaml task: squadv2 @@ -486,7 +486,7 @@ If other tasks on this dataset are already supported: It is recommended to include a filled-out copy of this checklist in the README.md for the subfolder you are creating, if you have created a new subfolder in `lm_eval/tasks`. -**Finally, please add a short description of your task(s), along with a link to its subfolder in lm_eval/tasks , to [`lm_eval/tasks/README.md`](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/README.md) so that users can discover your task in the library, and follow the link to your README for more information about the variants supported, their task names, and the original source of the dataset and/or evaluation setup.** +**Finally, please add a short description of your task(s), along with a link to its subfolder in lm_eval/tasks, to [`lm_eval/tasks/README.md`](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/README.md) so that users can discover your task in the library, and follow the link to your README for more information about the variants supported, their task names, and the original source of the dataset and/or evaluation setup.** ## Submitting your task diff --git a/docs/task_guide.md b/docs/task_guide.md index 23fbd1b9..4e055044 100644 --- a/docs/task_guide.md +++ b/docs/task_guide.md @@ -6,7 +6,7 @@ These YAML configuration files, along with the current codebase commit hash, are While adding a standard evaluation task on a new dataset can be occasionally as simple as swapping out a Hugging Face dataset path in an existing file, more specialized evaluation setups also exist. Here we'll provide a crash course on the more advanced logic implementable in YAML form available to users. -If your intended task relies on features beyond what are described in this guide, we'd love to hear about it! Feel free to open an issue describing the scenario on Github, create a PR to the project with a proposed implementation, or ask in the `#lm-thunderdome` channel on the EleutherAI discord. +If your intended task relies on features beyond what is described in this guide, we'd love to hear about it! Feel free to open an issue describing the scenario on Github, create a PR to the project with a proposed implementation, or ask in the `#lm-thunderdome` channel on the EleutherAI discord. ## Configurations @@ -47,7 +47,7 @@ Scoring details: - **metric_list** (`str`, *optional*, defaults to None) — A list of metrics to use for evaluation. See docs for expected format. - **output_type** (`str`, *optional*, defaults to "generate_until") — Selects the type of model output for the given task. Options are `generate_until`, `loglikelihood`, `loglikelihood_rolling`, and `multiple_choice`. - **generation_kwargs** (`dict`, *optional*) — Auxiliary arguments for the `generate` function from HF transformers library. Advanced keyword arguments may not be supported for non-HF LM classes. -- **repeats** (`int`, *optional*, defaults to 1) — Number of repeated runs through model for each sample. can be used for cases such as self-consistency. +- **repeats** (`int`, *optional*, defaults to 1) — Number of repeated runs through model for each sample. Can be used for cases such as self-consistency. - **filter_list** (`Union[str, list]`, *optional*) — List of filters to postprocess model outputs. See below for further detail on the filter API. - **should_decontaminate** (`bool`, *optional*, defaults to False) - Whether to decontaminate or not. - **doc_to_decontamination_query** (`str`, *optional*) — Query for decontamination if `should_decontaminate` is True. If `should_decontaminate` is True but `doc_to_decontamination_query` is `None`, `doc_to_decontamination_query` will follow `doc_to_text`. @@ -185,7 +185,7 @@ The prior implementation method of new tasks was to subclass `Task`. While we in ## Including a Base YAML -You can base a YAML on another YAML file as a template. This can be handy when you need to just change the prompt for `doc_to_text` but keep the rest the same or change `filters` to compare which is better. Simply use `include` in the YAML file and write the name of the template you want to base from. This assumes that the base temeplate is in the same directory. Otherwise, You will need to define the full path. +You can base a YAML on another YAML file as a template. This can be handy when you need to just change the prompt for `doc_to_text` but keep the rest the same or change `filters` to compare which is better. Simply use `include` in the YAML file and write the name of the template you want to base from. This assumes that the base template is in the same directory. Otherwise, You will need to define the full path. ``` include: ... @@ -297,7 +297,7 @@ Tasks using complex filtering: # Group Configuration -When evaluating a language model, it's is not unusual to test across a number of tasks that may not be related to one another in order to assess a variety of capabilities. To this end, it may be combursome to have to list the set of tasks or add a new group name to each yaml of each individual task. +When evaluating a language model, it is not unusual to test across a number of tasks that may not be related to one another in order to assess a variety of capabilities. To this end, it may be cumbersome to have to list the set of tasks or add a new group name to each yaml of each individual task. To solve this, we can create a **group** yaml config. This is a config that contains the names of the tasks that should be included in a particular group. The config consists of two main keys: a `group` key which denotes the name of the group (as it would be called from the command line, e.g. `mmlu`) and a `task` key which is where we can list the tasks. The tasks listed in `task` are the task names that have been registered. A good example of a group yaml config can be found at [../lm_eval/tasks/mmlu/default/_mmlu.yaml]. See also the [New Task Guide](./new_task_guide.md) for a more in-depth and tutorial-esque explanation of how to write complex GroupConfigs. @@ -312,7 +312,7 @@ Groups are configured via the `GroupConfig` object. Below, we describe all field - **task** (`Union[str, list]`, defaults to `None`) - List of tasks that constitute the group. - **aggregate_metric_list** (`list`, defaults to `None`) - similar to `metric_list` in TaskConfigs, provide a list of configurations for metrics that should be aggregated across subtasks. Leaving empty will result in no aggregation being performed for this group. Keys for each list entry are: - `metric: str` - the name of the metric to aggregate over (all subtasks must report a metric holding this name.) - - `aggregation: str` - what aggregation function to apply to aggregate these per-subtask metrics. **currently, only `mean` is supported.** + - `aggregation: str` - what aggregation function to apply to aggregate these per-subtask metrics. **currently, only `mean` is supported.** - `weight_by_size: bool = True` whether to perform micro- averaging (`True`) or macro- (`False`) averaging of subtasks' accuracy scores when reporting the group's metric. MMLU, for example, averages over per-document accuracies (the *micro average*), resulting in the same accuracy as if one simply concatenated all 57 subjects into a single dataset and evaluated accuracy on that dataset. - `filter_list: Union[str, List[str]] = "none"` - what filter keys one should match on to aggregate results. For example, if trying to aggregate over the `exact_match` metric using `strict-match` filter for `bbh_cot_zeroshot`, then set this to be `filter_list: "strict-match"`. - **metadata** (`dict`, *optional*) - As with TaskConfigs, a field where extra config metadata can be passed. set the `num_fewshot` key within this to override the printed n_shot value in a results table for your group, for example. -- GitLab From a40fe42aae2a5232448495c7bb5bf76aef100309 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:12:55 +0000 Subject: [PATCH 22/32] remove cuda device assertion (#2680) --- lm_eval/models/vllm_causallms.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index 5718cb5d..9b86c319 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -75,7 +75,6 @@ class VLLM(TemplateLM): "Please install vllm via `pip install lm-eval[vllm]` or `pip install -e .[vllm]`" ) - assert "cuda" in device or device is None, "vLLM only supports CUDA" assert max_length is None or max_model_len is None, ( "Either max_length or max_model_len may be provided, but not both" ) -- GitLab From b7fccef58e144b2e9bba762996e0b1d208dc972f Mon Sep 17 00:00:00 2001 From: Michele Resta <79645321+m-resta@users.noreply.github.com> Date: Tue, 11 Feb 2025 10:46:49 +0100 Subject: [PATCH 23/32] Adding the Evalita-LLM benchmark (#2681) * feat: initial commit with templates for evalita evaluation * fix: change rule for generate_until * feat: modified yaml to use reduced version of NER test datasets * feat: added templates to use reduced dataset for summarization (fanpage and ilpost) * Add Six Prompts for Each Multiple-Choice Task * feat: modified fewshot split for textual entailment task * fix: new doc_to_target function for NER tasks * Update prompt * Add partition for few-shot evaluation * Add partition for few-shot evaluation * Add partition for few-shot evaluation * Add partition for few-shot evaluatio * Update prompt * Add partition for few-shot evaluation * Rename file Rename file from _evalita-mp_ner_adg_p1 .yaml to _evalita-mp_ner_adg_p1.yaml * Add partition for few-shot evaluation * Add partition for few-shot evaluation * Enhance lexical substitution management - Improve scorer calculation for better accuracy - Update model output postprocessing for clearer results - Add support for few-shot relation extraction task * Add F1 macro measure for the document dating task * Add F1-macro measure to evaluate document dating * Use the whole dataset * Small changes * Add the two prompts for the task of lexical substitution * Add few-shot split configuration * Add few-shot split configuration * Add function for handling few-shot learning setup * Fix prompt * Remove configuration file * Update dataset from test_same to test_cross for evaluations * Remove whitespace at end of prompt * Fix configuration error: corrected parameter name for the dataset used in few-shot * Fix: Check if results is not empty before processing in lexical substitution task * added the prompts and functions for correct NER and RE execution * Add accuracy measure * Add tasks for the EVALITA-LLM benchmark evaluation * Small changes Add the alias of the task name that will be printed in the final table results. * Updated the prompts to reflect changes made to the extended dataset for the Admission Test task * chore: cleaned templates before PR; feat: add configuration to run generation/ppl tasks. * fix: add information on Evalita-LLM for PR * fix: rename folders and files * fix: remove unused imports * chore: run pre-commit * chore: add task description --------- Co-authored-by: rzanoli Co-authored-by: Marco Madeddu --- lm_eval/tasks/README.md | 1 + lm_eval/tasks/evalita_llm/README.md | 62 ++ lm_eval/tasks/evalita_llm/_at_template_yaml | 9 + lm_eval/tasks/evalita_llm/_evalita-mp.yaml | 18 + .../evalita_llm/_evalita-mp_at_task_p1.yaml | 18 + .../evalita_llm/_evalita-mp_at_task_p2.yaml | 18 + .../evalita_llm/_evalita-mp_at_task_p3.yaml | 16 + .../evalita_llm/_evalita-mp_at_task_p4.yaml | 15 + .../evalita_llm/_evalita-mp_at_task_p5.yaml | 18 + .../evalita_llm/_evalita-mp_at_task_p6.yaml | 18 + .../evalita_llm/_evalita-mp_at_tasks.yaml | 10 + .../tasks/evalita_llm/_evalita-mp_faq_p1.yaml | 14 + .../tasks/evalita_llm/_evalita-mp_faq_p2.yaml | 14 + .../tasks/evalita_llm/_evalita-mp_faq_p3.yaml | 12 + .../tasks/evalita_llm/_evalita-mp_faq_p4.yaml | 12 + .../tasks/evalita_llm/_evalita-mp_faq_p5.yaml | 15 + .../tasks/evalita_llm/_evalita-mp_faq_p6.yaml | 14 + .../evalita_llm/_evalita-mp_faq_tasks.yaml | 9 + .../tasks/evalita_llm/_evalita-mp_gen.yaml | 12 + .../tasks/evalita_llm/_evalita-mp_hs_p1.yaml | 13 + .../tasks/evalita_llm/_evalita-mp_hs_p2.yaml | 13 + .../tasks/evalita_llm/_evalita-mp_hs_p3.yaml | 14 + .../tasks/evalita_llm/_evalita-mp_hs_p4.yaml | 14 + .../tasks/evalita_llm/_evalita-mp_hs_p5.yaml | 14 + .../tasks/evalita_llm/_evalita-mp_hs_p6.yaml | 14 + .../evalita_llm/_evalita-mp_hs_task.yaml | 10 + .../tasks/evalita_llm/_evalita-mp_ls_p1.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_ls_p2.yaml | 6 + .../evalita_llm/_evalita-mp_ls_task.yaml | 10 + lm_eval/tasks/evalita_llm/_evalita-mp_mc.yaml | 14 + .../_evalita-mp_ner-adg_group.yaml | 9 + .../_evalita-mp_ner-adg_group_p1.yaml | 13 + .../_evalita-mp_ner-adg_group_p2.yaml | 13 + .../_evalita-mp_ner-fic_group.yaml | 9 + .../_evalita-mp_ner-fic_group_p1.yaml | 17 + .../_evalita-mp_ner-fic_group_p2.yaml | 13 + .../evalita_llm/_evalita-mp_ner-wn_group.yaml | 9 + .../_evalita-mp_ner-wn_group_p1.yaml | 13 + .../_evalita-mp_ner-wn_group_p2.yaml | 13 + .../evalita_llm/_evalita-mp_ner_adg.yaml | 7 + .../evalita_llm/_evalita-mp_ner_adg_p1.yaml | 14 + .../evalita_llm/_evalita-mp_ner_adg_p2.yaml | 14 + .../evalita_llm/_evalita-mp_ner_fic.yaml | 5 + .../evalita_llm/_evalita-mp_ner_fic_p1.yaml | 15 + .../evalita_llm/_evalita-mp_ner_fic_p2.yaml | 13 + .../evalita_llm/_evalita-mp_ner_group.yaml | 11 + .../tasks/evalita_llm/_evalita-mp_ner_wn.yaml | 7 + .../evalita_llm/_evalita-mp_ner_wn_p1.yaml | 13 + .../evalita_llm/_evalita-mp_ner_wn_p2.yaml | 13 + .../tasks/evalita_llm/_evalita-mp_re_p1.yaml | 10 + .../tasks/evalita_llm/_evalita-mp_re_p2.yaml | 10 + .../evalita_llm/_evalita-mp_re_task.yaml | 10 + .../tasks/evalita_llm/_evalita-mp_sa_p1.yaml | 10 + .../tasks/evalita_llm/_evalita-mp_sa_p2.yaml | 10 + .../tasks/evalita_llm/_evalita-mp_sa_p3.yaml | 11 + .../tasks/evalita_llm/_evalita-mp_sa_p4.yaml | 11 + .../tasks/evalita_llm/_evalita-mp_sa_p5.yaml | 11 + .../tasks/evalita_llm/_evalita-mp_sa_p6.yaml | 11 + .../evalita_llm/_evalita-mp_sa_tasks.yaml | 9 + .../_evalita-mp_sum_fp-small_p1.yaml | 12 + .../_evalita-mp_sum_fp-small_p2.yaml | 12 + .../_evalita-mp_sum_fp-small_task.yaml | 9 + .../evalita_llm/_evalita-mp_sum_fp_p1.yaml | 10 + .../evalita_llm/_evalita-mp_sum_fp_p2.yaml | 10 + .../evalita_llm/_evalita-mp_sum_fp_task.yaml | 9 + .../tasks/evalita_llm/_evalita-mp_te_p1.yaml | 9 + .../tasks/evalita_llm/_evalita-mp_te_p2.yaml | 5 + .../tasks/evalita_llm/_evalita-mp_te_p3.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_te_p4.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_te_p5.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_te_p6.yaml | 6 + .../evalita_llm/_evalita-mp_te_tasks.yaml | 9 + .../tasks/evalita_llm/_evalita-mp_wic_p1.yaml | 5 + .../tasks/evalita_llm/_evalita-mp_wic_p2.yaml | 5 + .../tasks/evalita_llm/_evalita-mp_wic_p3.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_wic_p4.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_wic_p5.yaml | 6 + .../tasks/evalita_llm/_evalita-mp_wic_p6.yaml | 6 + .../evalita_llm/_evalita-mp_wic_tasks.yaml | 9 + lm_eval/tasks/evalita_llm/_faq_template_yaml | 8 + lm_eval/tasks/evalita_llm/_hs_template_yaml | 9 + lm_eval/tasks/evalita_llm/_ls_template_yaml | 16 + lm_eval/tasks/evalita_llm/_ner_template_yaml | 14 + lm_eval/tasks/evalita_llm/_re_template_yaml | 14 + .../tasks/evalita_llm/_sa_template_v2_yaml | 9 + lm_eval/tasks/evalita_llm/_sa_template_yaml | 9 + .../evalita_llm/_sum_template_fp-small_yaml | 10 + .../tasks/evalita_llm/_sum_template_fp_yaml | 9 + lm_eval/tasks/evalita_llm/_sum_template_yaml | 11 + lm_eval/tasks/evalita_llm/_te_template_yaml | 13 + lm_eval/tasks/evalita_llm/_wic_template_yaml | 14 + lm_eval/tasks/evalita_llm/metrics.py | 165 +++++ lm_eval/tasks/evalita_llm/single_prompt.zip | Bin 0 -> 13335 bytes lm_eval/tasks/evalita_llm/utils.py | 576 ++++++++++++++++++ 94 files changed, 1786 insertions(+) create mode 100644 lm_eval/tasks/evalita_llm/README.md create mode 100644 lm_eval/tasks/evalita_llm/_at_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p3.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p4.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p5.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p6.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_at_tasks.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_p3.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_p4.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_p5.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_p6.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_faq_tasks.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_gen.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_p3.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_p4.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_p5.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_p6.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_hs_task.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ls_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ls_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ls_task.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_mc.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_group.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_re_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_re_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_re_task.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_p3.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_p4.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_p5.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_p6.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sa_tasks.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_task.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_task.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_p3.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_p4.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_p5.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml create mode 100644 lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml create mode 100644 lm_eval/tasks/evalita_llm/_faq_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_hs_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_ls_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_ner_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_re_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_sa_template_v2_yaml create mode 100644 lm_eval/tasks/evalita_llm/_sa_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml create mode 100644 lm_eval/tasks/evalita_llm/_sum_template_fp_yaml create mode 100644 lm_eval/tasks/evalita_llm/_sum_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_te_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/_wic_template_yaml create mode 100644 lm_eval/tasks/evalita_llm/metrics.py create mode 100644 lm_eval/tasks/evalita_llm/single_prompt.zip create mode 100644 lm_eval/tasks/evalita_llm/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 7a53dce9..750dd181 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -42,6 +42,7 @@ | [eus_proficiency](eus_proficiency/README.md) | Tasks designed to test proficiency in the Basque language across various topics. | Basque | | [eus_reading](eus_reading/README.md) | Reading comprehension tasks specifically designed for the Basque language. | Basque | | [eus_trivia](eus_trivia/README.md) | Trivia and knowledge testing tasks in the Basque language. | Basque | +| [evalita-LLM](evalita-LLM/README.md) | A native Italian benchmark with diverse tasks formats and multiple prompts. | Italian | | [fda](fda/README.md) | Tasks for extracting key-value pairs from FDA documents to test information extraction. | English | | [fld](fld/README.md) | Tasks involving free-form and directed dialogue understanding. | English | | [french_bench](french_bench/README.md) | Set of tasks designed to assess language model performance in French. | French | diff --git a/lm_eval/tasks/evalita_llm/README.md b/lm_eval/tasks/evalita_llm/README.md new file mode 100644 index 00000000..a2d14889 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/README.md @@ -0,0 +1,62 @@ +# Evalita-LLM + +### Paper + +Evalita-LLM, a new benchmark designed to evaluate Large Language +Models (LLMs) on Italian tasks. The distinguishing and innovative features of +Evalita-LLM are the following: (i) all tasks are native Italian, avoiding issues of +translating from Italian and potential cultural biases; (ii) in addition to well established multiple-choice tasks, the benchmark includes generative tasks, enabling more natural interaction with LLMs; (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer and objective evaluation. + +### Citation + +```bibtex +@misc{magnini2025evalitallmbenchmarkinglargelanguage, + title={Evalita-LLM: Benchmarking Large Language Models on Italian}, + author={Bernardo Magnini and Roberto Zanoli and Michele Resta and Martin Cimmino and Paolo Albano and Marco Madeddu and Viviana Patti}, + year={2025}, + eprint={2502.02289}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2502.02289}, +} +``` + +### Groups + +- `evalita-mp`: All tasks (perplexity and non-perplexity based). +- `evalita-mp_gen`: Only generative tasks. +- `evalita-mp_mc`: Only perplexity-based tasks. + +#### Tasks + +The following Evalita-LLM tasks can also be evaluated in isolation: + - `evalita-mp_te`: Textual Entailment + - `evalita-mp_sa`: Sentiment Analysis + - `evalita-mp_wic`: Word in Context + - `evalita-mp_hs`: Hate Speech Detection + - `evalita-mp_at`: Admission Tests + - `evalita-mp_faq`: FAQ + - `evalita-mp_sum_fp`: Summarization + - `evalita-mp_ls`: Lexical Substitution + - `evalita-mp_ner_group`: Named Entity Recognition + - `evalita-mp_re`: Relation Extraction + + +### Usage + +```bash + +lm_eval --model hf --model_args pretrained=meta-llama/Llama-2-7b-hf --tasks evalita-mp --device cuda:0 --batch_size auto +``` + +### Checklist + +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [x] If yes, does the original paper provide a reference implementation? + * [x] Yes, original implementation contributed by author of the benchmark + +If other tasks on this dataset are already supported: +* [x] Is the "Main" variant of this task clearly denoted? +* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [x] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/evalita_llm/_at_template_yaml b/lm_eval/tasks/evalita_llm/_at_template_yaml new file mode 100644 index 00000000..8c8d7a9c --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_at_template_yaml @@ -0,0 +1,9 @@ +dataset_path: evalitahf/admission_test +output_type: multiple_choice +test_split: test +fewshot_split: dev +validation_split: test +doc_to_target: Correct +doc_to_choice: ["A", "B", "C", "D", "E"] +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp.yaml new file mode 100644 index 00000000..c212e77f --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp.yaml @@ -0,0 +1,18 @@ +group: evalita-mp +group_alias: Evalita-LLM +task: + - evalita-mp_te + - evalita-mp_sa + - evalita-mp_wic + - evalita-mp_hs + - evalita-mp_at + - evalita-mp_faq + - evalita-mp_sum_fp + - evalita-mp_ls + - evalita-mp_ner_group + - evalita-mp_re +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p1.yaml new file mode 100644 index 00000000..1655213e --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p1.yaml @@ -0,0 +1,18 @@ +tag: evalita-mp_at_tasks +include: _at_template_yaml +task: evalita-mp_at_prompt-1 +task_alias: prompt-1 +#doc_to_text: "Rispondi alla domanda a scelta multipla considerando le informazioni del testo seguente.\nTesto: {{background}}\nDomanda: {{domanda}}\nOpzioni: A: {{A}} B: {{B}} C: {{C}} D: {{D}}" +#doc_to_text: "Dato il seguente caso clinico: '{{background}}' qual è la risposta corretta alla domanda: '{{domanda}}'?" +doc_to_text: "Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?" +doc_to_choice: "{{[A,B,C,D,E]}}" +doc_to_target: "{{ A if Correct == 'A' else B if Correct == 'B' else C if Correct == 'C' else D if Correct == 'D' else E}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p2.yaml new file mode 100644 index 00000000..1b0c16fc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p2.yaml @@ -0,0 +1,18 @@ +tag: evalita-mp_at_tasks +include: _at_template_yaml +task: evalita-mp_at_prompt-2 +task_alias: prompt-2 +#doc_to_text: "Rispondi alla domanda a scelta multipla considerando le informazioni del testo seguente.\nTesto: {{background}}\nDomanda: {{domanda}}\nOpzioni: A: {{A}} B: {{B}} C: {{C}} D: {{D}}" +#doc_to_text: "Devi risolvere un compito di risposte a domande. Dato il seguente caso clinico: '{{background}}' qual è la risposta corretta alla domanda: '{{domanda}}'?" +doc_to_text: "Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?" +doc_to_choice: "{{[A,B,C,D,E]}}" +doc_to_target: "{{ A if Correct == 'A' else B if Correct == 'B' else C if Correct == 'C' else D if Correct == 'D' else E}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p3.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p3.yaml new file mode 100644 index 00000000..767a75a0 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p3.yaml @@ -0,0 +1,16 @@ +tag: evalita-mp_at_tasks +include: _at_template_yaml +task: evalita-mp_at_prompt-3 +task_alias: prompt-3 +#doc_to_text: "Rispondi alla domanda a scelta multipla considerando le informazioni del testo seguente.\nTesto: {{background}}\nDomanda: {{domanda}}\nOpzioni: A: {{A}} B: {{B}} C: {{C}} D: {{D}}" +#doc_to_text: "Dato il seguente caso clinico: '{{background}}', qual è la risposta corretta alla domanda: '{{domanda}}'?\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nE: {{E}}\nRisposta:" +doc_to_text: "Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nE: {{E}}\nRisposta:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p4.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p4.yaml new file mode 100644 index 00000000..a7b29f81 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p4.yaml @@ -0,0 +1,15 @@ +tag: evalita-mp_at_tasks +include: _at_template_yaml +task: evalita-mp_at_prompt-4 +task_alias: prompt-4 +#doc_to_text: "Rispondi alla domanda a scelta multipla considerando le informazioni del testo seguente.\nTesto: {{background}}\nDomanda: {{domanda}}\nOpzioni: A: {{A}} B: {{B}} C: {{C}} D: {{D}}" +doc_to_text: "Devi risolvere un compito a scelta multipla. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nE: {{E}}\nRisposta:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p5.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p5.yaml new file mode 100644 index 00000000..fead4471 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p5.yaml @@ -0,0 +1,18 @@ +tag: evalita-mp_at_tasks +include: _at_template_yaml +task: evalita-mp_at_prompt-5 +task_alias: prompt-5 +#doc_to_text: "Rispondi alla domanda a scelta multipla considerando le informazioni del testo seguente.\nTesto: {{background}}\nDomanda: {{domanda}}\nOpzioni: A: {{A}} B: {{B}} C: {{C}} D: {{D}}" +#doc_to_text: "Dato il seguente caso clinico: '{{background}}'. La risposta corretta alla domanda: '{{domanda}}' è:" +doc_to_text: "Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è:" +doc_to_choice: "{{[A,B,C,D,E]}}" +doc_to_target: "{{ A if Correct == 'A' else B if Correct == 'B' else C if Correct == 'C' else D if Correct == 'D' else E}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p6.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p6.yaml new file mode 100644 index 00000000..9fee15e0 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_task_p6.yaml @@ -0,0 +1,18 @@ +tag: evalita-mp_at_tasks +include: _at_template_yaml +task: evalita-mp_at_prompt-6 +task_alias: prompt-6 +#doc_to_text: "Rispondi alla domanda a scelta multipla considerando le informazioni del testo seguente.\nTesto: {{background}}\nDomanda: {{domanda}}\nOpzioni: A: {{A}} B: {{B}} C: {{C}} D: {{D}}" +#doc_to_text: "Devi risolvere un compito di risposte a domande. Dato il seguente caso clinico: '{{background}}'. La risposta corretta alla domanda: '{{domanda}}' è:" +doc_to_text: "Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è:" +doc_to_choice: "{{[A,B,C,D,E]}}" +doc_to_target: "{{ A if Correct == 'A' else B if Correct == 'B' else C if Correct == 'C' else D if Correct == 'D' else E}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true + - metric: acc_norm + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_at_tasks.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_at_tasks.yaml new file mode 100644 index 00000000..b04e6420 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_at_tasks.yaml @@ -0,0 +1,10 @@ +group: evalita-mp_at +group_alias: admission-test +task: + - evalita-mp_at_tasks # Each of the tasks has to have a matching tag in its own yaml file +aggregate_metric_list: + - metric: acc + weight_by_size: True + aggregation: mean +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p1.yaml new file mode 100644 index 00000000..fb7aaa31 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p1.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_faq_tasks +include: _faq_template_yaml +task: evalita-mp_faq_prompt-1 +task_alias: prompt-1 +#doc_to_text: "Data la seguente domanda {{question}}, individua la risposta corretta tra le seguenti opzioni:\n A: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}} Risposta:" +doc_to_text: "Rispondi alla seguente domanda: '{{question}}'" +doc_to_choice: "{{[A,B,C,D]}}" +doc_to_target: "{{ A if correct_answer == 'A' else B if correct_answer == 'B' else C if correct_answer == 'C' else D}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p2.yaml new file mode 100644 index 00000000..7c873df8 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p2.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_faq_tasks +include: _faq_template_yaml +task: evalita-mp_faq_prompt-2 +task_alias: prompt-2 +#doc_to_text: "Data la seguente domanda {{question}}, individua la risposta corretta tra le seguenti opzioni:\n A: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}} Risposta:" +doc_to_text: "Devi risolvere un compito di risposte a domande. Rispondi alla seguente domanda: '{{question}}'" +doc_to_choice: "{{[A,B,C,D]}}" +doc_to_target: "{{ A if correct_answer == 'A' else B if correct_answer == 'B' else C if correct_answer == 'C' else D}}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p3.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p3.yaml new file mode 100644 index 00000000..bac97f21 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p3.yaml @@ -0,0 +1,12 @@ +tag: evalita-mp_faq_tasks +include: _faq_template_yaml +task: evalita-mp_faq_prompt-3 +task_alias: prompt-3 +#doc_to_text: "Data la seguente domanda {{question}}, individua la risposta corretta tra le seguenti opzioni:\n A: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}} Risposta:" +doc_to_text: "Rispondi alla seguente domanda: '{{question}}'\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nRisposta:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p4.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p4.yaml new file mode 100644 index 00000000..43e45d19 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p4.yaml @@ -0,0 +1,12 @@ +tag: evalita-mp_faq_tasks +include: _faq_template_yaml +task: evalita-mp_faq_prompt-4 +task_alias: prompt-4 +#doc_to_text: "Data la seguente domanda {{question}}, individua la risposta corretta tra le seguenti opzioni:\n A: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}} Risposta:" +doc_to_text: "Devi risolvere un compito a scelta multipla. Rispondi alla seguente domanda: '{{question}}'\nA: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}}\nRisposta:" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p5.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p5.yaml new file mode 100644 index 00000000..d01cb3c8 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p5.yaml @@ -0,0 +1,15 @@ +tag: evalita-mp_faq_tasks +include: _faq_template_yaml +task: evalita-mp_faq_prompt-5 +task_alias: prompt-5 +#doc_to_text: "Data la seguente domanda {{question}}, individua la risposta corretta tra le seguenti opzioni:\n A: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}} Risposta:" +#doc_to_text: "La risposta alla domanda: '{{question}}' è:" +doc_to_text: "La risposta alla domanda: '{{question}}' è:" +doc_to_choice: "{{[A,B,C,D]}}" +doc_to_target: "{{ A if correct_answer == 'A' else B if correct_answer == 'B' else C if correct_answer == 'C' else D }}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p6.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p6.yaml new file mode 100644 index 00000000..e886e36b --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_p6.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_faq_tasks +include: _faq_template_yaml +task: evalita-mp_faq_prompt-6 +task_alias: prompt-6 +#doc_to_text: "Data la seguente domanda {{question}}, individua la risposta corretta tra le seguenti opzioni:\n A: {{A}}\nB: {{B}}\nC: {{C}}\nD: {{D}} Risposta:" +doc_to_text: "Devi risolvere un compito di risposte a domande. La risposta alla domanda: '{{question}}' è:" +doc_to_choice: "{{[A,B,C,D]}}" +doc_to_target: "{{ A if correct_answer == 'A' else B if correct_answer == 'B' else C if correct_answer == 'C' else D }}" +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_faq_tasks.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_tasks.yaml new file mode 100644 index 00000000..ffc0089c --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_faq_tasks.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_faq +group_alias: faq +task: + - evalita-mp_faq_tasks # Each of the tasks has to have a matching tag in its own yaml file +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_gen.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_gen.yaml new file mode 100644 index 00000000..7f93e186 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_gen.yaml @@ -0,0 +1,12 @@ +group: evalita-mp_gen +group_alias: Evalita-LLM - Generative +task: + - evalita-mp_sum_fp + - evalita-mp_ls + - evalita-mp_ner_group + - evalita-mp_re +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p1.yaml new file mode 100644 index 00000000..837071e6 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p1.yaml @@ -0,0 +1,13 @@ +tag: evalita-mp_hs_tasks +include: _hs_template_yaml +task: evalita-mp_hs_prompt-1 +task_alias: prompt-1 +#doc_to_text: "Dato il seguente testo, rispondi Vero se il testo contiene hate speech, altrimenti rispondi Falso. Testo:\n{{full_text}} Risposta:" +doc_to_text: "C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?" +metric_list: + - metric: f1 + higher_is_better: true + average: macro + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p2.yaml new file mode 100644 index 00000000..0c04b216 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p2.yaml @@ -0,0 +1,13 @@ +tag: evalita-mp_hs_tasks +include: _hs_template_yaml +task: evalita-mp_hs_prompt-2 +task_alias: prompt-2 +#doc_to_text: "Dato il seguente testo, rispondi Vero se il testo contiene hate speech, altrimenti rispondi Falso. Testo:\n{{full_text}} Risposta:" +doc_to_text: "Devi svolgere un compito di identificazione di incitamento all'odio. C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?" +metric_list: + - metric: f1 + higher_is_better: true + average: macro + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p3.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p3.yaml new file mode 100644 index 00000000..d3efe05e --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p3.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_hs_tasks +include: _hs_template_yaml +task: evalita-mp_hs_prompt-3 +task_alias: prompt-3 +doc_to_choice: ["B", "A"] +#doc_to_text: "Dato il seguente testo, rispondi Vero se il testo contiene hate speech, altrimenti rispondi Falso. Testo:\n{{full_text}} Risposta:" +doc_to_text: "C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?\nA: Vero\nB: Falso\nRisposta:" +metric_list: + - metric: f1 + higher_is_better: true + average: macro + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p4.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p4.yaml new file mode 100644 index 00000000..639de50e --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p4.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_hs_tasks +include: _hs_template_yaml +task: evalita-mp_hs_prompt-4 +task_alias: prompt-4 +doc_to_choice: ["B", "A"] +#doc_to_text: "Dato il seguente testo, rispondi Vero se il testo contiene hate speech, altrimenti rispondi Falso. Testo:\n{{full_text}} Risposta:" +doc_to_text: "Devi svolgere un compito di identificazione di incitamento all'odio. C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?\nA: Sì\nB: No\nRisposta:" +metric_list: + - metric: f1 + higher_is_better: true + average: macro + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p5.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p5.yaml new file mode 100644 index 00000000..e99fb191 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p5.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_hs_tasks +include: _hs_template_yaml +task: evalita-mp_hs_prompt-5 +task_alias: prompt-5 +doc_to_choice: ["non contiene incitamento all'odio", "contiene incitamento all'odio"] +#doc_to_text: "Dato il seguente testo, rispondi Vero se il testo contiene hate speech, altrimenti rispondi Falso. Testo:\n{{full_text}} Risposta:" +doc_to_text: "Il tweet: '{{full_text}}'" +metric_list: + - metric: f1 + higher_is_better: true + average: macro + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p6.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p6.yaml new file mode 100644 index 00000000..4f37e9bc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_p6.yaml @@ -0,0 +1,14 @@ +tag: evalita-mp_hs_tasks +include: _hs_template_yaml +task: evalita-mp_hs_prompt-6 +task_alias: prompt-6 +doc_to_choice: ["non contiene incitamento all'odio", "contiene incitamento all'odio"] +#doc_to_text: "Dato il seguente testo, rispondi Vero se il testo contiene hate speech, altrimenti rispondi Falso. Testo:\n{{full_text}} Risposta:" +doc_to_text: "Devi svolgere un compito di identificazione di incitamento all'odio. Il tweet: '{{full_text}}'" +metric_list: + - metric: f1 + higher_is_better: true + average: macro + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_hs_task.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_task.yaml new file mode 100644 index 00000000..5455c880 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_hs_task.yaml @@ -0,0 +1,10 @@ +group: evalita-mp_hs +group_alias: hate-speech-detection +task: + - evalita-mp_hs_tasks +aggregate_metric_list: + - metric: f1 + weight_by_size: True + +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ls_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ls_p1.yaml new file mode 100644 index 00000000..92b6513e --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ls_p1.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_ls_tasks +include: _ls_template_yaml +task: evalita-mp_ls_prompt-1 +task_alias: prompt-1 +#doc_to_text: "Sostituisci la parola tra i tag con sinonimi appropriati per il contesto. Separa i sinonimi con virgole. Testo:\n{{context}}" +doc_to_text: "Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ls_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ls_p2.yaml new file mode 100644 index 00000000..2aeaddf7 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ls_p2.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_ls_tasks +include: _ls_template_yaml +task: evalita-mp_ls_prompt-2 +task_alias: prompt-2 +#doc_to_text: "Sostituisci la parola tra i tag con sinonimi appropriati per il contesto. Separa i sinonimi con virgole. Testo:\n{{context}}" +doc_to_text: "Devi risolvere un compito di sostituzione lessicale. Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ls_task.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ls_task.yaml new file mode 100644 index 00000000..1c67a087 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ls_task.yaml @@ -0,0 +1,10 @@ +group: evalita-mp_ls +group_alias: lexical-substitution +task: +- evalita-mp_ls_tasks +aggregate_metric_list: + - metric: f1 + weight_by_size: True + +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_mc.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_mc.yaml new file mode 100644 index 00000000..098ed1c5 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_mc.yaml @@ -0,0 +1,14 @@ +group: evalita-mp_mc +group_alias: Evalita-LLM - PPL-based +task: + - evalita-mp_te + - evalita-mp_sa + - evalita-mp_wic + - evalita-mp_hs + - evalita-mp_at + - evalita-mp_faq +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group.yaml new file mode 100644 index 00000000..0bfc01d8 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_ner_adg_group +group_alias: 'evalita NER: ADG' +task: + - evalita-mp_ner-v2_tasks_adg +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p1.yaml new file mode 100644 index 00000000..72bc56a9 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p1.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: adg +test_split: reduced_test +fewshot_split: dev +task_alias: prompt-1 +tag: evalita-mp_ner-v2_tasks_adg +task: evalita-mp_ner-v2_adg_p1 + +# English +#doc_to_text: "Given the following text, write the entity mentions in the text, indicating their type: [PER] (person), [LOC] (location), [ORG] (organization). Respond with the following format: Entity$Type. Separate each entity-type pair with the '%' character. Text: {{text}}" +# Italian +doc_to_text: "Dato il seguente testo, scrivi le menzioni di entità nel testo, indicandone il tipo: PER (persona), LOC (luogo), ORG (organizzazione). Rispondi con il seguente formato: Entità$Tipo%Entità$Tipo. Separa ogni coppia entità-tipo con il carattere '%' ad esempio: Entità_2$Tipo%Entità_2$Tipo. In caso non ci siano entita' rispondi '&&NOENT&&'. +Testo: {{text}}" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p2.yaml new file mode 100644 index 00000000..9066b707 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-adg_group_p2.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: adg +test_split: reduced_test +fewshot_split: dev +task_alias: prompt-2 +tag: evalita-mp_ner-v2_tasks_adg +task: evalita-mp_ner-v2_adg_p2 + +# English +#doc_to_text: "Given the following text, write the entity mentions in the text, indicating their type: [PER] (person), [LOC] (location), [ORG] (organization). Respond with the following format: Entity$Type. Separate each entity-type pair with the '%' character. Text: {{text}}" +# Italian +doc_to_text: "Dato il seguente testo, scrivi le menzioni di entità nel testo, indicandone il tipo: PER (persona), LOC (luogo), ORG (organizzazione). Rispondi con il seguente formato: Entità$Tipo%Entità$Tipo. Separa ogni coppia entità-tipo con il carattere '%' ad esempio: Entità_2$Tipo%Entità_2$Tipo. In caso non ci siano entita' rispondi '&&NOENT&&'. +Testo: {{text}}" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group.yaml new file mode 100644 index 00000000..da789e78 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_ner_fic_group +group_alias: 'evalita NER: FIC' +task: + - evalita-mp_ner-v2_tasks_fic +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p1.yaml new file mode 100644 index 00000000..52accfb4 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p1.yaml @@ -0,0 +1,17 @@ +include: _ner_template_yaml +dataset_name: fic +test_split: reduced_test +test_split: dev +fewshot_split: dev +task_alias: prompt-1 +tag: evalita-mp_ner-v2_tasks_fic +task: evalita-mp_ner-v2_fic_p1 + +# +doc_to_target: !function utils.filter_per_entities_from_lines +doc_to_target: entities + +# English +#doc_to_text: "Given the following text, write the entity mentions in the text, indicating their type: [PER] (person), [LOC] (location), [ORG] (organization). Respond with the following format: Entity$Type. Separate each entity-type pair with the '%' character. Text: {{text}}" +# Italian +doc_to_text: "Dato il seguente testo, scrivi le menzioni di entità nel testo, indicandone il tipo: PER (persona), LOC (luogo), ORG (organizzazione). Rispondi con il seguente formato: Entità$Tipo%Entità$Tipo. Separa ogni coppia entità-tipo con il carattere '%' ad esempio: Entità_2$Tipo%Entità_2$Tipo. In caso non ci siano entita' rispondi '&&NOENT&&'. Testo: {{text}}" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p2.yaml new file mode 100644 index 00000000..6b8a22c9 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-fic_group_p2.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: fic +test_split: reduced_test +fewshot_split: dev +task_alias: prompt-2 +tag: evalita-mp_ner-v2_tasks_fic +task: evalita-mp_ner-v2_fic_p2 + +# English +#doc_to_text: "Given the following text, write the entity mentions in the text, indicating their type: [PER] (person), [LOC] (location), [ORG] (organization). Respond with the following format: Entity$Type. Separate each entity-type pair with the '%' character. Text: {{text}}" +# Italian +doc_to_text: "Dato il seguente testo, scrivi le menzioni di entità nel testo, indicandone il tipo: PER (persona), LOC (luogo), ORG (organizzazione). Rispondi con il seguente formato: Entità$Tipo%Entità$Tipo. Separa ogni coppia entità-tipo con il carattere '%' ad esempio: Entità_2$Tipo%Entità_2$Tipo. In caso non ci siano entita' rispondi '&&NOENT&&'. +Testo: {{text}}" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group.yaml new file mode 100644 index 00000000..a8b18b92 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_ner_wn_group +group_alias: 'evalita NER: WN' +task: + - evalita-mp_ner-v2_tasks_wn +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p1.yaml new file mode 100644 index 00000000..ab353acc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p1.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: wn +test_split: reduced_test +fewshot_split: dev +task_alias: prompt-1 +tag: evalita-mp_ner-v2_tasks_wn +task: evalita-mp_ner-v2_wn_p1 + +# English +#doc_to_text: "Given the following text, write the entity mentions in the text, indicating their type: [PER] (person), [LOC] (location), [ORG] (organization). Respond with the following format: Entity$Type. Separate each entity-type pair with the '%' character. Text: {{text}}" +# Italian +doc_to_text: "Dato il seguente testo, scrivi le menzioni di entità nel testo, indicandone il tipo: PER (persona), LOC (luogo), ORG (organizzazione). Rispondi con il seguente formato: Entità$Tipo%Entità$Tipo. Separa ogni coppia entità-tipo con il carattere '%' ad esempio: Entità_2$Tipo%Entità_2$Tipo. In caso non ci siano entita' rispondi '&&NOENT&&'. +Testo: {{text}}" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p2.yaml new file mode 100644 index 00000000..becc3d85 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner-wn_group_p2.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: wn +test_split: reduced_test +fewshot_split: dev +task_alias: prompt-2 +tag: evalita-mp_ner-v2_tasks_wn +task: evalita-mp_ner-v2_wn_p2 + +# English +#doc_to_text: "Given the following text, write the entity mentions in the text, indicating their type: [PER] (person), [LOC] (location), [ORG] (organization). Respond with the following format: Entity$Type. Separate each entity-type pair with the '%' character. Text: {{text}}" +# Italian +doc_to_text: "Dato il seguente testo, scrivi le menzioni di entità nel testo, indicandone il tipo: PER (persona), LOC (luogo), ORG (organizzazione). Rispondi con il seguente formato: Entità$Tipo%Entità$Tipo. Separa ogni coppia entità-tipo con il carattere '%' ad esempio: Entità_2$Tipo%Entità_2$Tipo. In caso non ci siano entita' rispondi '&&NOENT&&'. +Testo: {{text}}" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg.yaml new file mode 100644 index 00000000..02362087 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg.yaml @@ -0,0 +1,7 @@ +group: evalita-mp_ner_tasks_adg +group_alias: evalita NER adg +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p1.yaml new file mode 100644 index 00000000..16ba0b7a --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p1.yaml @@ -0,0 +1,14 @@ +include: _ner_template_yaml +dataset_name: adg +test_split: reduced_test +fewshot_split: trial + +task_alias: ADG prompt-1 +tag: evalita-mp_ner_tasks_adg +task: evalita-mp_ner_adg_p1 + + +#p1 +doc_to_text: "Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. +Testo: '{{text}}' +Entità:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p2.yaml new file mode 100644 index 00000000..fb3d852c --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_adg_p2.yaml @@ -0,0 +1,14 @@ +include: _ner_template_yaml +dataset_name: adg +test_split: reduced_test +fewshot_split: trial + +task_alias: ADG prompt-2 +tag: evalita-mp_ner_tasks_adg +task: evalita-mp_ner_adg_p2 + + +#p8 +doc_to_text: "Devi svolgere un compito di riconoscimento delle entità nei testi. Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. +Testo: '{{text}}' +Entità:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic.yaml new file mode 100644 index 00000000..9e338724 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic.yaml @@ -0,0 +1,5 @@ + +group: evalita-mp_ner_tasks_fic +group_alias: evalita NER fic + +task_alias: NER fic diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p1.yaml new file mode 100644 index 00000000..248b150d --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p1.yaml @@ -0,0 +1,15 @@ +include: _ner_template_yaml +dataset_name: fic +test_split: reduced_test +fewshot_split: trial + +task_alias: FIC prompt-1 +tag: evalita-mp_ner_tasks_fic +task: evalita-mp_ner_fic_p1 + + + +#p1 +doc_to_text: "Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. +Testo: '{{text}}' +Entità:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p2.yaml new file mode 100644 index 00000000..f7145415 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_fic_p2.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: fic +test_split: reduced_test +fewshot_split: trial + +task_alias: FIC prompt-2 +tag: evalita-mp_ner_tasks_fic +task: evalita-mp_ner_fic_p2 + +#p8 +doc_to_text: "Devi svolgere un compito di riconoscimento delle entità nei testi. Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. +Testo: '{{text}}' +Entità:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_group.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_group.yaml new file mode 100644 index 00000000..adc8e485 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_group.yaml @@ -0,0 +1,11 @@ +group: evalita-mp_ner_group +group_alias: evalita NER +task: + - evalita-mp_ner_tasks_fic + - evalita-mp_ner_tasks_adg + - evalita-mp_ner_tasks_wn +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn.yaml new file mode 100644 index 00000000..3b1cd45f --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn.yaml @@ -0,0 +1,7 @@ +group: evalita-mp_ner_tasks_wn +group_alias: evalita NER wn +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p1.yaml new file mode 100644 index 00000000..a096b564 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p1.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: wn +test_split: reduced_test +fewshot_split: trial + +task_alias: WN prompt-1 +tag: evalita-mp_ner_tasks_wn +task: evalita-mp_ner_wn_p1 + + +doc_to_text: "Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. +Testo: '{{text}}' +Entità:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p2.yaml new file mode 100644 index 00000000..ff481e7d --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_ner_wn_p2.yaml @@ -0,0 +1,13 @@ +include: _ner_template_yaml +dataset_name: wn +test_split: reduced_test +fewshot_split: trial + +task_alias: WN prompt-2 +tag: evalita-mp_ner_tasks_wn +task: evalita-mp_ner_wn_p2 + + +doc_to_text: "Devi svolgere un compito di riconoscimento delle entità nei testi. Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. +Testo: '{{text}}' +Entità:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_re_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_re_p1.yaml new file mode 100644 index 00000000..9755dc97 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_re_p1.yaml @@ -0,0 +1,10 @@ +tag: evalita-mp_re_tasks +include: _re_template_yaml +task: evalita-mp_re_prompt-1 +fewshot_split: dev +task_alias: prompt-1 + +#p4 +doc_to_text: "Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'. +Testo: '{{text}}' +Relazioni:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_re_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_re_p2.yaml new file mode 100644 index 00000000..7ea25f7e --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_re_p2.yaml @@ -0,0 +1,10 @@ +tag: evalita-mp_re_tasks +include: _re_template_yaml +fewshot_split: dev +task: evalita-mp_re_prompt-2 +task_alias: prompt-2 + +#p5 +doc_to_text: "Devi svolgere un compito di estrazione di relazioni da documenti medici. Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'. +Testo: '{{text}}' +Relazioni:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_re_task.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_re_task.yaml new file mode 100644 index 00000000..5b629da4 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_re_task.yaml @@ -0,0 +1,10 @@ +group: evalita-mp_re +group_alias: relation-extraction +task: +- evalita-mp_re_tasks +aggregate_metric_list: + - metric: f1 + weight_by_size: True + +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p1.yaml new file mode 100644 index 00000000..01d7cee6 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p1.yaml @@ -0,0 +1,10 @@ +tag: evalita-mp_sa_tasks +include: _sa_template_yaml +task: evalita-mp_sa_prompt-1 +task_alias: prompt-1 +#doc_to_text: "Opinione: '{{text}}' Determinare la sentiment dell'opinione data. Possibili risposte: A – neutrale B – negativo C – positivo D - misto Risposta:" +doc_to_text: "Qual è il sentiment espresso nel seguente tweet: '{{text}}'?" +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_sa diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p2.yaml new file mode 100644 index 00000000..9188f614 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p2.yaml @@ -0,0 +1,10 @@ +tag: evalita-mp_sa_tasks +include: _sa_template_yaml +task: evalita-mp_sa_prompt-2 +task_alias: prompt-2 +#doc_to_text: "Opinione: '{{text}}' Determinare la sentiment dell'opinione data. Possibili risposte: A – neutrale B – negativo C – positivo D - misto Risposta:" +doc_to_text: "Devi svolgere un compito di analisi del sentiment. Qual è il sentiment espresso nel seguente tweet: '{{text}}'?" +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_sa diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p3.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p3.yaml new file mode 100644 index 00000000..cf61e9c4 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p3.yaml @@ -0,0 +1,11 @@ +tag: evalita-mp_sa_tasks +include: _sa_template_yaml +task: evalita-mp_sa_prompt-3 +task_alias: prompt-3 +#doc_to_text: "Opinione: '{{text}}' Determinare la sentiment dell'opinione data. Possibili risposte: A – neutrale B – negativo C – positivo D - misto Risposta:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_text: "Qual è il sentiment espresso nel seguente tweet: '{{text}}'?\nA: Positivo\nB: Negativo\nC: Neutro\nD: Misto\nRisposta:" +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_sa diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p4.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p4.yaml new file mode 100644 index 00000000..72c956d1 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p4.yaml @@ -0,0 +1,11 @@ +tag: evalita-mp_sa_tasks +include: _sa_template_yaml +task: evalita-mp_sa_prompt-4 +task_alias: prompt-4 +#doc_to_text: "Opinione: '{{text}}' Determinare la sentiment dell'opinione data. Possibili risposte: A – neutrale B – negativo C – positivo D - misto Risposta:" +doc_to_choice: ["A", "B", "C", "D"] +doc_to_text: "Devi svolgere un compito di analisi del sentiment. Qual è il sentiment espresso nel seguente tweet: '{{text}}'?\nA: Positivo\nB: Negativo\nC: Neutro\nD: Misto\nRisposta:" +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_sa diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p5.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p5.yaml new file mode 100644 index 00000000..cc58565c --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p5.yaml @@ -0,0 +1,11 @@ +tag: evalita-mp_sa_tasks +include: _sa_template_yaml +task: evalita-mp_sa_prompt-5 +task_alias: prompt-5 +#doc_to_text: "Opinione: '{{text}}' Determinare la sentiment dell'opinione data. Possibili risposte: A – neutrale B – negativo C – positivo D - misto Risposta:" +#doc_to_choice: ["A", "B", "C", "D"] +doc_to_text: "Il seguente tweet: '{{text}}' esprime un sentiment" +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_sa diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p6.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p6.yaml new file mode 100644 index 00000000..6904835e --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_p6.yaml @@ -0,0 +1,11 @@ +tag: evalita-mp_sa_tasks +include: _sa_template_yaml +task: evalita-mp_sa_prompt-6 +task_alias: prompt-6 +#doc_to_text: "Opinione: '{{text}}' Determinare la sentiment dell'opinione data. Possibili risposte: A – neutrale B – negativo C – positivo D - misto Risposta:" +#doc_to_choice: ["A", "B", "C", "D"] +doc_to_text: "Devi svolgere un compito di analisi del sentiment. Il seguente tweet: '{{text}}' esprime un sentiment" +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_sa diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sa_tasks.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_tasks.yaml new file mode 100644 index 00000000..1f6b883c --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sa_tasks.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_sa +group_alias: sentiment-analysis +task: + - evalita-mp_sa_tasks # Each of the tasks has to have a matching tag in its own yaml file +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p1.yaml new file mode 100644 index 00000000..28975c31 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p1.yaml @@ -0,0 +1,12 @@ +tag: evalita-mp_sum_fp-small_tasks +include: _sum_template_fp-small_yaml +task: evalita-sp_sum_task_fp-small_p1 +task_alias: prompt-1 +#doc_to_text: > +# "Crea un sommario del seguente testo. Testo: {{source}}\nSommario: " +doc_to_text: "Riassumi il seguente articolo di giornale: '{{source}}'\nRiassunto:" +process_results: !function utils.process_results_sum +metric_list: + - metric: rouge1 + higher_is_better: true + aggregation: mean diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p2.yaml new file mode 100644 index 00000000..a705af96 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_p2.yaml @@ -0,0 +1,12 @@ +tag: evalita-mp_sum_fp-small_tasks +include: _sum_template_fp-small_yaml +task: evalita-sp_sum_task_fp-small_p2 +task_alias: prompt-2 +#doc_to_text: > +# "Crea un sommario del seguente testo. Testo: {{source}}\nSommario: " +doc_to_text: "Devi risolvere un compito di sintesi automatica del testo. Riassumi il seguente articolo di giornale: '{{source}}'\nRiassunto:" +process_results: !function utils.process_results_sum +metric_list: + - metric: rouge1 + higher_is_better: true + aggregation: mean diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_task.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_task.yaml new file mode 100644 index 00000000..c0c339f8 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp-small_task.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_sum_fp +group_alias: summarization-fanpage +task: +- evalita-mp_sum_fp-small_tasks +aggregate_metric_list: + - metric: rouge1 + weight_by_size: True +metadata: + version: 0.0 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p1.yaml new file mode 100644 index 00000000..4bc412f7 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p1.yaml @@ -0,0 +1,10 @@ +tag: evalita-mp_sum_fp_tasks +include: _sum_template_fp_yaml +task: evalita-sp_sum_task_fp_p1 +task_alias: prompt-1 +doc_to_text: "Riassumi il seguente articolo di giornale: '{{source}}'\nRiassunto:" +process_results: !function utils.process_results_sum +metric_list: + - metric: rouge1 + higher_is_better: true + aggregation: mean diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p2.yaml new file mode 100644 index 00000000..4d409ccc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_p2.yaml @@ -0,0 +1,10 @@ +tag: evalita-mp_sum_fp_tasks +include: _sum_template_fp_yaml +task: evalita-sp_sum_task_fp_p2 +task_alias: prompt-2 +doc_to_text: "Devi risolvere un compito di sintesi automatica del testo. Riassumi il seguente articolo di giornale: '{{source}}'\nRiassunto:" +process_results: !function utils.process_results_sum +metric_list: + - metric: rouge1 + higher_is_better: true + aggregation: mean diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_task.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_task.yaml new file mode 100644 index 00000000..147fe567 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_sum_fp_task.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_sum_fp +group_alias: summarization-fanpage +task: +- evalita-mp_sum_fp_tasks +aggregate_metric_list: + - metric: rouge1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p1.yaml new file mode 100644 index 00000000..e9841a45 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p1.yaml @@ -0,0 +1,9 @@ +tag: evalita-mp_te_tasks +include: _te_template_yaml +task: evalita-mp_te_prompt-1 +task_alias: prompt-1 +#doc_to_text: "Task di Text Entailment. Rispondi Vero o Falso in base alla correttezza dell'ipotesi rispetto al testo.\nTesto:{{text1}}\nIpotesi: {{text2}}\nRisposta:" +doc_to_text: "La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?" +#metric_list: +# - metric: acc +# higher_is_better: true diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p2.yaml new file mode 100644 index 00000000..932fc185 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p2.yaml @@ -0,0 +1,5 @@ +tag: evalita-mp_te_tasks +include: _te_template_yaml +task: evalita-mp_te_prompt-2 +task_alias: prompt-2 +doc_to_text: "Devi risolvere un compito di inferenza semantica. La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_p3.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p3.yaml new file mode 100644 index 00000000..91e0c667 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p3.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_te_tasks +include: _te_template_yaml +task: evalita-mp_te_prompt-3 +task_alias: prompt-3 +doc_to_choice: ["A", "B"] +doc_to_text: "La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?\nA: Sì\nB: No\nRisposta:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_p4.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p4.yaml new file mode 100644 index 00000000..8ffc087d --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p4.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_te_tasks +include: _te_template_yaml +task: evalita-mp_te_prompt-4 +task_alias: prompt-4 +doc_to_choice: ["A", "B"] +doc_to_text: "Devi risolvere un compito di inferenza semantica. La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?\nA: Sì\nB: No\nRisposta:" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_p5.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p5.yaml new file mode 100644 index 00000000..2cee2a12 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p5.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_te_tasks +include: _te_template_yaml +task: evalita-mp_te_prompt-5 +task_alias: prompt-5 +doc_to_choice: ["La frase 1 implica logicamente che la frase 2 sia vera", "La frase 1 non implica logicamente che la frase 2 sia vera"] +doc_to_text: "Frase 1: '{{text1}}' Frase 2: '{{text2}}'" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml new file mode 100644 index 00000000..e06bbefe --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_p6.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_te_tasks +include: _te_template_yaml +task: evalita-mp_te_prompt-6 +task_alias: prompt-6 +doc_to_choice: ["La frase 1 implica logicamente che la frase 2 sia vera", "La frase 1 non implica logicamente che la frase 2 sia vera"] +doc_to_text: "Devi risolvere un compito di inferenza semantica. Frase 1: '{{text1}}' Frase 2: '{{text2}}'" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml new file mode 100644 index 00000000..8c6d53fc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_te_tasks.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_te +group_alias: text-entailment +task: + - evalita-mp_te_tasks # this has to match the tag in the task yaml file +aggregate_metric_list: + - metric: acc + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml new file mode 100644 index 00000000..5a8c47fa --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p1.yaml @@ -0,0 +1,5 @@ +tag: evalita-mp_wic_tasks +task: evalita-mp_wic_prompt-1 +task_alias: prompt-1 +include: _wic_template_yaml +doc_to_text: "La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml new file mode 100644 index 00000000..f990ee78 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p2.yaml @@ -0,0 +1,5 @@ +tag: evalita-mp_wic_tasks +task: evalita-mp_wic_prompt-2 +task_alias: prompt-2 +include: _wic_template_yaml +doc_to_text: "Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?" diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml new file mode 100644 index 00000000..20267adc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p3.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_wic_tasks +task: evalita-mp_wic_prompt-3 +task_alias: prompt-3 +include: _wic_template_yaml +doc_to_text: "La parola '{{sentence1[start1:end1]}}' nella frase '{{sentence1}}' ha lo stesso significato della parola '{{sentence2[start2:end2]}}' nella frase '{{sentence2}}'?\nA: Sì\nB: No\nRisposta:" +doc_to_choice: ["B", "A"] diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml new file mode 100644 index 00000000..46086de3 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p4.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_wic_tasks +task: evalita-mp_wic_prompt-4 +task_alias: prompt-4 +include: _wic_template_yaml +doc_to_text: "Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola '{{sentence1[start1:end1]}}' nella frase '{{sentence1}}' ha lo stesso significato della parola '{{sentence2[start2:end2]}}' nella frase '{{sentence2}}'?\nA: Sì\nB: No\nRisposta:" +doc_to_choice: ["B", "A"] diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml new file mode 100644 index 00000000..3a8e883a --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p5.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_wic_tasks +task: evalita-mp_wic_prompt-5 +task_alias: prompt-5 +include: _wic_template_yaml +doc_to_text: "La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'" +doc_to_choice: ["non hanno lo stesso significato", "hanno lo stesso significato"] diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml new file mode 100644 index 00000000..56ddf9d9 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_p6.yaml @@ -0,0 +1,6 @@ +tag: evalita-mp_wic_tasks +task: evalita-mp_wic_prompt-6 +task_alias: prompt-6 +include: _wic_template_yaml +doc_to_text: "Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'" +doc_to_choice: ["non hanno lo stesso significato", "hanno lo stesso significato"] diff --git a/lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml new file mode 100644 index 00000000..5825046b --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_evalita-mp_wic_tasks.yaml @@ -0,0 +1,9 @@ +group: evalita-mp_wic +group_alias: word-in-context +task: + - evalita-mp_wic_tasks # this has to match the tag in the task yaml file +aggregate_metric_list: + - metric: f1 + weight_by_size: True +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_faq_template_yaml b/lm_eval/tasks/evalita_llm/_faq_template_yaml new file mode 100644 index 00000000..5620b948 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_faq_template_yaml @@ -0,0 +1,8 @@ +dataset_path: evalitahf/faq +test_split: test_1 +fewshot_split: dev_1 +doc_to_target: !function utils.faq_doc_to_target +doc_to_choice: ["A", "B", "C", "D"] +output_type: multiple_choice +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_hs_template_yaml b/lm_eval/tasks/evalita_llm/_hs_template_yaml new file mode 100644 index 00000000..c224f4e3 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_hs_template_yaml @@ -0,0 +1,9 @@ +dataset_path: evalitahf/hatespeech_detection +output_type: multiple_choice +test_split: test_all +fewshot_split: dev +validation_split: dev +doc_to_target: hs # 0 = Falso, 1 = Vero +doc_to_choice: ["Falso", "Vero"] +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_ls_template_yaml b/lm_eval/tasks/evalita_llm/_ls_template_yaml new file mode 100644 index 00000000..a5df2eb1 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_ls_template_yaml @@ -0,0 +1,16 @@ +dataset_path: evalitahf/lexical_substitution +test_split: test +validation_split: dev +fewshot_split: dev +output_type: generate_until +generation_kwargs: + until: + - "" +doc_to_target: !function utils.ls_doc_to_target +process_results: !function utils.ls_process_results +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_ls +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_ner_template_yaml b/lm_eval/tasks/evalita_llm/_ner_template_yaml new file mode 100644 index 00000000..77dd0c3b --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_ner_template_yaml @@ -0,0 +1,14 @@ +dataset_path: evalitahf/entity_recognition +output_type: generate_until +generation_kwargs: + until: + - "" + - "\n" +doc_to_target: !function utils.ner_doc_to_target +process_results: !function utils.ner_process_results +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_ner +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_re_template_yaml b/lm_eval/tasks/evalita_llm/_re_template_yaml new file mode 100644 index 00000000..9621af12 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_re_template_yaml @@ -0,0 +1,14 @@ +dataset_path: evalitahf/relation_extraction +test_split: test +output_type: generate_until +generation_kwargs: + until: + - "" +doc_to_target: !function utils.re_doc_to_target +process_results: !function utils.rel_process_results_v3 +metric_list: + - metric: f1 + higher_is_better: True + aggregation: !function metrics._aggreg_rel +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_sa_template_v2_yaml b/lm_eval/tasks/evalita_llm/_sa_template_v2_yaml new file mode 100644 index 00000000..b9fc6460 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_sa_template_v2_yaml @@ -0,0 +1,9 @@ +dataset_path: evalitahf/sentiment_analysis +output_type: multiple_choice +test_split: test +fewshot_split: train +validation_split: test +doc_to_target: !function utils.sa_doc_to_target_v2 +doc_to_choice: ["positivo", "negativo", "neutrale", "misto"] +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_sa_template_yaml b/lm_eval/tasks/evalita_llm/_sa_template_yaml new file mode 100644 index 00000000..49ae1c80 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_sa_template_yaml @@ -0,0 +1,9 @@ +dataset_path: evalitahf/sentiment_analysis +output_type: multiple_choice +test_split: test +fewshot_split: train +validation_split: test +doc_to_target: !function utils.sa_doc_to_target +doc_to_choice: !function utils.sa_doc_to_choice +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml b/lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml new file mode 100644 index 00000000..fb067b9d --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_sum_template_fp-small_yaml @@ -0,0 +1,10 @@ +dataset_path: evalitahf/summarization-fp +output_type: generate_until +generation_kwargs: + until: + - "" +test_split: test_100 +fewshot_split: dev +doc_to_target: "{{target}}" +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_sum_template_fp_yaml b/lm_eval/tasks/evalita_llm/_sum_template_fp_yaml new file mode 100644 index 00000000..33993742 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_sum_template_fp_yaml @@ -0,0 +1,9 @@ +dataset_path: ARTeLab/fanpage +output_type: generate_until +generation_kwargs: + until: + - "" +test_split: test +doc_to_target: "{{target}}" +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/_sum_template_yaml b/lm_eval/tasks/evalita_llm/_sum_template_yaml new file mode 100644 index 00000000..bfe69669 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_sum_template_yaml @@ -0,0 +1,11 @@ +dataset_path: silvia-casola/WITS +output_type: generate_until +generation_kwargs: + until: + - "" +test_split: test_100 +fewshot_split: dev +#test_split: train +doc_to_target: "{{summary}}" +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_te_template_yaml b/lm_eval/tasks/evalita_llm/_te_template_yaml new file mode 100644 index 00000000..ed8888fc --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_te_template_yaml @@ -0,0 +1,13 @@ +dataset_path: evalitahf/textual_entailment +output_type: multiple_choice +test_split: test +fewshot_split: dev +validation_split: dev +doc_to_target: "{{ 0 if entailment == 'SI' else 1 }}" +doc_to_choice: ["Sì", "No"] +metric_list: + - metric: acc + aggregation: mean + higher_is_better: true +metadata: + version: 1 diff --git a/lm_eval/tasks/evalita_llm/_wic_template_yaml b/lm_eval/tasks/evalita_llm/_wic_template_yaml new file mode 100644 index 00000000..bb5d0f00 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/_wic_template_yaml @@ -0,0 +1,14 @@ +dataset_path: evalitahf/word_in_context +dataset_name: default +output_type: multiple_choice +test_split: test +fewshot_split: dev +validation_split: dev +doc_to_target: label # 0: No, 1: Si +doc_to_choice: ["No", "Sì"] +metric_list: + - metric: f1 + higher_is_better: true + aggregation: f1 +metadata: + version: 1.0 diff --git a/lm_eval/tasks/evalita_llm/metrics.py b/lm_eval/tasks/evalita_llm/metrics.py new file mode 100644 index 00000000..2dbc53f3 --- /dev/null +++ b/lm_eval/tasks/evalita_llm/metrics.py @@ -0,0 +1,165 @@ +import torch +from sklearn.metrics import f1_score, precision_score, recall_score + + +inference_decorator = ( + torch.inference_mode if torch.__version__ >= "2.0.0" else torch.no_grad +) + + +def _aggreg_ls(predictions): + """ + Custom aggregation to compute corpus level metrics for the lexical substitution task + predictions is a list of tuples (prec, has_answ, has_annotation) + prec is the precision before dividing by |A| + has_answ is 0 if the model did not produce any answer + has_annotation is 0 if the gold answer is empty: no synonims from annotators + """ + # get |A| and |T| to compute the final precision and recall using a lambda function + A = sum([p[1] for p in predictions]) + T = sum([p[2] for p in predictions]) + # compute the final precision and recall + if A == 0: + prec = sum([p[0] for p in predictions]) / 1 + else: + prec = sum([p[0] for p in predictions]) / A + if T == 0: + rec = sum([p[0] for p in predictions]) / 1 + else: + rec = sum([p[0] for p in predictions]) / T + # compute the final F1 score + f1 = 0 + if prec + rec != 0: + f1 = (2 * prec * rec) / (prec + rec) + return f1 + + +def _aggreg_sa_v2(predictions): + """ + This aggregation considers the sentiment analysis task as a multiple choice one with four classes + the f1 score is computed as the average of the f1 scores for each class weighted by the number of samples + See sklearn.metrics.f1_score for more details + + """ + predictions, references = zip(*predictions) + f1 = f1_score(references, predictions, average="weighted") + return f1 + + +def _aggreg_sa(predictions): + """ + Custom aggregation function for the sentiment analysis task + The original tasks compute the F1 score for each class and then average them + Since the prompt cast the task to a multple choice one we need to aggregate the results in a different way + """ + # split the predictions and references in two lists (pred is a tuple) + predictions, references = zip(*predictions) + """ + Class 0: positivo -> 'opos': 1, 'oneg': 0 + Class 1: negativo -> 'opos': 0, 'oneg': 1 + etc. + """ + + def _map_to_original_labels(x): + """ + Return two separate list of labels for opos and oneg + x is a list of integers + """ + opos = [] + oneg = [] + for i in x: + if i == 0: + # positive + opos.append(1) + oneg.append(0) + elif i == 1: + # negative + opos.append(0) + oneg.append(1) + elif i == 2: + # neutral + opos.append(0) + oneg.append(0) + elif i == 3: + # mixed + opos.append(1) + oneg.append(1) + else: + pass + return opos, oneg + + pred_opos, pred_oneg = _map_to_original_labels(predictions) + ref_opos, ref_oneg = _map_to_original_labels(references) + + opos_f1 = f1_score(ref_opos, pred_opos, average=None) + opos_f1_c0 = f1_score(ref_opos, pred_opos, average=None)[0] + if len(opos_f1) > 1: + opos_f1_c1 = opos_f1[1] + else: + opos_f1_c1 = 0 + + # oneg class + oneg_prec_c0, oneg_prec_c1 = precision_score( + ref_oneg, pred_oneg, labels=[0, 1], average=None + ) + oneg_rec_c0, oneg_rec_c1 = recall_score( + ref_oneg, pred_oneg, labels=[0, 1], average=None + ) + oneg_f1 = f1_score(ref_oneg, pred_oneg, average=None) + oneg_f1_c0 = f1_score(ref_oneg, pred_oneg, average=None)[0] + if len(oneg_f1) > 1: + oneg_f1_c1 = f1_score(ref_oneg, pred_oneg, average=None)[1] + else: + oneg_f1_c1 = 0 + + # average f1 score for each class (opos and oneg) + f1_score_opos = (opos_f1_c0 + opos_f1_c1) / 2 + f1_score_oneg = (oneg_f1_c0 + oneg_f1_c1) / 2 + # average f1 score for the two classes + f1_final = (f1_score_opos + f1_score_oneg) / 2 + + return f1_final + + +def _aggreg_ner(predictions): + pred, ref = zip(*predictions) + # concat all the predictions and references + all_pred = [] + for p in pred: + all_pred.extend(p) + all_ref = [] + for r in ref: + all_ref.extend(r) + # compute the F1 score + f1 = f1_score(all_ref, all_pred, average=None) + if len(f1) > 1: + f1_sum = sum(f1[:-1]) / (len(f1) - 1) + else: + f1_sum = f1[0] + + return f1_sum + + +def _aggreg_rel(predictions): + pred, ref = zip(*predictions) + # concat all the predictions and references + all_pred = [] + for p in pred: + all_pred.extend(p) + all_ref = [] + for r in ref: + all_ref.extend(r) + # compute the F1 score + f1 = f1_score(all_ref, all_pred, average="macro") + return f1 + + +# ------------------------ DOCUMENT DATING --------------------------- + + +def _aggreg_dd(items): + unzipped_list = list(zip(*items)) + golds = unzipped_list[0] + preds = unzipped_list[1] + fscore = f1_score(golds, preds, average="macro") + return fscore diff --git a/lm_eval/tasks/evalita_llm/single_prompt.zip b/lm_eval/tasks/evalita_llm/single_prompt.zip new file mode 100644 index 0000000000000000000000000000000000000000..3a8534bc54055cc7908fe4a96e290b7e8c28d2ba GIT binary patch literal 13335 zcmcIq2RPMl`?ohq_TG-!v9kA`Nj7Egb&QNC^AL%Qh-{}2*+fQ0NHUUfWJD>-tnAhQ ze0yK7qv4nLf4xta4%hd(zW2F5_vg8v=l5yKqo&P$n!`j1S(Ggm}#L z%e%~NeJouZy)AjYAm(0IT+O{Ly`0VMAbkFot}Z|9w7wm-d)=CC@AW3z{wF)OI(2Ss zV1kJ?vSJAiSwq50+qhjuvKF=`o}jy>F{kFi7d!!A7Oz|J{D9xo0fET8Bp{u!ZEQ4vYO=a4F^Un1u|_n^J{P zdvtIEseA!dczvcN3`0Ig*8q*}!epC=GdUl{w29=U($X!+h$-dE_oeV@2*bpJV9PVk zFZM4{-B!y-vM!iDGl3`G;ARze9Q$thaCBp$sDpJ!CUnuQ@^L6pVSVU^{&3BWkccL+Q?}$U0Nb&aMYK-~sx$9_P3^9nsPeX?BRpPrk-wVFyEr+-YFi##D81}~&YVZ9l?ofJYrIW%?B#7XjDY~9C%x6o zmKR$K%#_Vd@v0rZKMQ>%5WxF2M>#{}Zh^F=eYE5kbK?x(4;RHLHQ>d*{(Hj3uGXS8 z?bSXHwY7Km)su=Oq;Eq9NhHOs$Kzem>#BpTrxdS6y1era<;&_-o{|H{ln)hy*PbMc z%8L7U?XGw=?hs~4LgyA{F(r55*AweK{Cx^se0_C2a^$*abDo6KDJ>UAu;i~R6su5( zC%v4P?$u%oFj%=MiPeG@{*W$_&kJJ(WI5^b@v)cAqnL+^mYDkAxAXINBW7p0r@gWS z_)X2;ZN+w>(%cwaXqS``XMY@AfW|;cV9RA6)tl{a^nDvG`K6HPRQVg@7t&FU+;VVD z2OU)$YNkrxTZuPy{KGsuT(qe0H;gEgDk=Q2KyI+F+nyJ@BE~jSqE6R#*fnm%TpY0N z74&~dpgTuUrh~4uHEYzs@ojq^y61PPrIF0T+oD~SUvYXaf=o0Ltd!MfcW?3P#U48^ z(8HMgzzy7eRDepKWz4YPh+P5@=8#I+c8A%m1M!soSSne1|42xOT%s$BKobS&Q1m;_h2_l zBagFTO1!ncmM&ege8svV7wM>Dy&?UJ8CkPiZ?o@8esT(CS>Ybkh`1<9n=P3vD~F?P3V9NC75}q@ zLrr#F*f)WC7Oe#riwhS~7N!lK2YK?1pct+jswcNM%>;m``h^Cf`*#%SQ$+5LNA4v= z@N)585K*s4h-3g_Q0)?Xg&2V{fb?MukWYnsh$rA(8h z1`|PeKxXUcV1TYkw2*&zy#R@)=-Hd^pk(K^Bqr)3ZA(>ig&2E81mPeo{M86rG8N;D z!FH{ps4g$5L?|DKmpf0FC6DL%g0zQa9;oztxUWWj2@@$kA&=2J3a6L2?3&zI0<#Ml zABXpI$mt22SM+dl*l&;TQT0A!B;{gb(;?}>7IjU5b}M=EW|oZ1dN*wG5`5e@T!b`8 zO}rCqNlN_|po!`e_<6SP;Ug>FQQ(BQQ{59JGBmTZpwH4h9cO7e&I4n++IeBH=8`fT z6Q6BF_LLUdUF*ArwV)-aUZGLHZXpXnj?Nr|jVWp378ZcH4T^fwIwE}n$o?L4Ah}8) zFhA)D@hABI_vb`X&;}QwVv5vF9n@C{r?7E$%4^^LdQKn{Ya8L@@tfP*_NUE#44p2; z);pog5h(3!;Z0x1&)0j;K7(!s6Ce<&wBiwnvsI>XdAVP2zsSef-#A-vn@OOHA%Xfq zli!(qHrN&N2NybK*et^(JUt`!cb3PO7ua^Q=QPf-$Q&G@r3huh%Nth>-7=% zE08bgXgMQ5Zk#oAXbh*-or`U9oo}>W5CuEsKCs%yNhUE+|Gu-p3Wm2|A9x+mSMkmw z*v`8c{=xxsao5aaZhSB?M3;N(RgNzE0%2nj>4%-C1*_4Xb%G&QvLMh-U=?NmMEHHp z)$&T}%K12Ic!n|&twQX><&A*=Nhi?+-LJIZ)1tfILNe2f-m{0$lWnZfD{a1vWh#%k zAQaFjMC>pS9y?V%g?eWe$DMEn_wq_PYG3Xb?!aPkq52nz>`TSn;URe0&}!=`&*yB- zcOtpj?iq?Del(=s)I7sWN~B;CY6V{LPL(&DvSSRW|2lRmg_Q^YZgB=@SFX6;Qvh=- zlC<$X1m*y;zsFnDOz!xVvp)sJ;E|wJ(jN zu2B+Ro&c)>pOtTmTqwER3xEDV2Q+7qi%BlRGK*p{&s*#gw@M&F&q2DCjWP9F(=TD` zR3Z}(Mm;>_9ukp9^D_6WH(pySTv=zl?-WN^vB{!Kz|b#p)76^voQks#VRT+jdbE{Y z(QJ{Umf9Q15U&>yEZ&(i#kVhvb*7`v?=j#Rm3PBxEO(Wj27SspyR*F4y|nfD-BWUP z6`$FMKSEg+AT4Q#2qO@%!%$v8szBc$0?qjA_W5xsuyuvFSR%6ce`-Z1M8R_K^HbrX zf4&VP{a|#<+Z(CBG@*E#-k6$nO;~@v5MQ#6Q1vim45fI<`uPHg$uCs1EhH4Y zaK>&JDw7y zC!I<;ec)p#d!L{W=gfn-(=N36id1qh0%jX!!EtS-TcJ3{pjpoqV!u9ew#gRmSx&4@ zTWQ()kL1i%XUiLnub~4O1UB@N0JtInfc_q8fRulEEp>$a>0b03$-*^GN?VbiAbcmi z)#bZEu9jTe>1@Sl&CD(nQJH8ujN{F!Y#;Y*t#2{W~xiJ}% zJS`v*xjeOVJs^@M@J+$Nji6SM?p#Y|y;Emw7ItrSRHJAVIk}bc8)bCM^t6Sci%R;) zQ({)}1wb<}k_NJ^FqECvaQa53rvrz@ufn=N+?U+jmZCMA7fx`RgDJR7HU-eImDINx zt!{QgcozkXXDP#nc6lmVv{w4(RlQ}{g3siiS!<@g+!gZ#dp?Z3HA(#bT6)$6o+WGm z)x!7Y-f#q}0CvAe^}oA#{u$u^{l$~u$Hj9)s_Wq5DcsT|_3OnGS)edP)SJg&JOP2i z&e8htv_Dp%&~h6C6GJz|+Zkni@Zfy?_iJvQ#sDp}uJM7wQN@z6?yH8v7T?G%CDhMY z(S7E)5mo}DYV!ZKsYZQ9S^ZAuU{ZfY8g}$2Zn}O*YqWe0Hncri9bV3r0D3s5q$AJR zXb3AB=O!6uBgp$U$E}f*FfCJVQS|b1ci`K+VH!=nj51**gI(X^&&eMs#9ED7o$5ZJ znY9fJM_6ApbIyM_^XyA$c-_@0bYoS*mR_p&u`bTfRRl$`vmtmpIbo(i%b3vsyS`mFz8fKNo=JSi7VEss@Z0<6_?&louYO~sqj0JUT&l;1>#X)V ziF!Iox4)_D&$YSinILpSu;@bKY2kAtrF z`uh3Tkx0jJvQ)OSWd>*x!DV4r&wkt*;IE@nB-?DZ09_Q*@W$%d+kR|VW}|Ew*Xd-> zxx>0%f?+9`E62p-5dLyfd7L`kC4a|e>|D0 zI|mK=%S|w%Kj8ypN|Vn~fwsqT$xe`}kzQG4E?~Sp$vtpoi46B1UdJn?)$02?-78r} zW%_odw%6w_%o4IcJP%K0=H8bu*y!G-XrtsdDy_K|>})jJ0dcwMyr`FvF1mjQZ0H-v zh+oL&=iO1uGKTf_6>V(DnKcd@MC$S0`s9K^kA+2L+JkpdW`qP=mE&w7!}XGyr_+h~ z#2I^%k&z7d+;wSEsuk=j8xv#k2~DI=O|ZFeUbPpd6{}{jl6umgV{>`U!_B9{9{1m5 zdc*&T`3ISL9yIXERWs{HwUYSkEoNnYrOV7J4ez`5;<<}Q)T{(MwhNw5zv@xJ`l&2(u%f??l>43kxHf{nQ*PGUcxQU9IXd3zM(Zm=?y_*O z(pme;HM{D`9=i_}a9hdcr_C}z_PxeemxVT?(PU zx0*ZfP>DXu{m4!grlzq6J{3n~;l~xOBUs>F*Q|(}WOd&Kah6p?z8{uM@8hbD@vf7c zZPQw|gls&uifnWBtn9h*3yd0$LdfGWYUQdDPQP-Po-~6=BrmMo*PrkFj zdwuYYr}-e6DlmDotV&;XSB`GPOnX)N&P18RLA2`n=jZZR+K!Y`Ty=s( zlb{!!853kBczl&0^6=2Fx8sDdR_!hMFS%rN(5@Gf8*>TQ7j__-=UXr72j4=T13f+ztowZeOvhycXS&exx-Ztw0^VQRvfdF+D8`tuaeM z8Lho{xoJc)ma~TS9ZA=W{{Grq{}(PsK9B2MAFx*2J752COEw4_P9j7=48Z*R02Cs^ z148+^XNBDK0M1`~PxmW-NAgQI`H5O(2{3j7pfb0a9|f4;DY{dEJD6GH7cpAq>sUxI zd(`U%i==r4LyIMiJShZky@R>(zJ=k5UdLMSjZC2RtrP(HYY(`V;S`)!eejjI`I{b?KTQo&m-0EMMQ~4?;MHjZ;E8JnxHe2XN{4&W8SD zrR(ssgnbO#ZY8uhu^=jf$28I1S-Q8u)bj7LZhUDZ3+G~$27&+ z`Rfe#^RZXYpO8fYuu;4LD{6dZT$+Q%EPpRw(7^2b)0*`%r%A8*2pa=*}|+_3Aq z7ZmdyMX!YQ$~~;eiS*@rsgG0{atp3#m$eJ`Pp{0W1b39L_@&Ntj$YD#fgjLlf@~wI}0wYa~6HEe(}FT@8P_)H+9hR^7EGcr!y^ z)+~2a*xy}`ClHbv0e6mpwGNTcTQ#0l>sfQcXH0MZ0Qb!`eKFNGqz1{IF3wDSz|dP6 z5&~^VE`6GrUNKx&i@N!P$j_22se#%SaQYAq2Lzc=F;sJMA}3U**`if z^IpZ}YxDuZ{65r_Jr~aKcM>J*o>PB&P5mn6Lc#Z)HL6huf+=oW{?AvU5`A=aFJlv} zFj4nuu)p6_@LGyloi~?q8zsukhoL5pWbo$~GQdSs2L}j+m5P>jYJ%GYjiEN6IBp7S z%Az5bv^m{wM!#8u47UtArXj0VsIL_LHVeo!ag^}u<8684d0h7~lTFa$G20&hpv~)@ z^TWD%kU6{c3DU{}DAC*(95g=rO#)2;?eScPT^m6+)M10k)0gUA%>@Zvbh>i!m8{#9 zImNxH;BTJ_cQAnjL45TQXa<2PfZidl6lRA#t`-CV>3^l!Vb}fe7&?FNkG-uc@&D;< z$tL@(Ol2qby}D#!W5UkWRigS~dG$^L5#bD9#O~^s_&86;o-rJ6<-X`fmyF!NTXfAi z-}9*DM1ytoh(4fGzIq#O8!0&Zo)1qcXa{Bff}*MgJ!7`ke8Br0J%Xf?v{wq{vg<>I z(|j)Ye#Wl{y%rX!owXErb+a*V7P=_r560n zAa9l!{DrOv`z57__i7N~W8sCz`fpeTuP<-JCB+n-8jTpC&V+oW#TBw7je9zJS$0qt zPZX29kOj0zaJxW0DwABNCZk_^T-=*4p4;?Xh)}t3rc$R$N=V+u0GJqr?s5B{)6mv@7cmR5di_9|9wz^IvCJ||E2-+ zYkdCqRqr^5hZLcr%+?)e+}t1O?CUPN)~{7B^8R&1lrT)k@yy)Q_Ry7M&LG!#4YzT` z*4>cCjPESA(`D5tr6EO)CU>Y5F}jz@#HA6T8Zdx`>KU_+)u`g0keF0KW=-qOA-$h9 zlaECPt@?c76q~o%GILZ@-f22+gLu5^+Jwngl7V`i@s!&%J_Gsm>(w;WTrHvMh4XY= z`Z8r+!@O_bRvB`Nka2bACJ}dG(C)_43t44kwu_B#H(HC4&T18K6WZ-sNTgqzikF-V z$tLWDvg}B~NU9d@ak_I5Xk*GtKEvm8A9As!45JY?=Vmxi)@!cjk;S?(_O8S zvtIAmIow{7kQvo54q8_! zdHLqdU;}d_G_CEeOlfvc2U{)XN4wAq=@j7Kcg;mrvn~(*6 zY5w!%iks*b9L<>W&KZ;88}dtMQXDYmcY?xb-c`QW>u{1&f)laZEVDYdMjFAjKscv! z2F5|Sa?kM}1h4pw)96p9UkyUl4iO<*6f>LUTfMl5>&{)Iw`SrB73R?8y^aCzVR92%G7zW8lv<4x@-& z1~EOj17oZ`+)q*Gb{hZKlvgZ3}p-94^5nC|!f0Jse+>@SYuW@8sMR zei2qSv_2c<)@dx!0N)42*IsdmMOL8o;y)r6PwDDn(A5)&EU(Ft#Bv^jdKzc^i^{LF zY89~&-~s^aXrQ8zqW^h94iTXrYf%pPI689m&+~HsdiTM^9P*48P_Q2?Scv8???$d` zkR~PmD%BqNbYK^G_zIXHJ6N!gDg*+5b66ce!uOkl3;8Pmx)<`$4lw0)uwWsHh|}~a ztjMmPVCDpJxg$@=hWD55< zn+MSUS33*2Wer^X4i+p#0smN|CmRj;*;nND>QQ_j6C7vuB;9djn?U0hxs3@lZVwhL zB$VhEn@6hMziHzlTLTz`+{rr{&Dc}FSUW6w%um!P{n;pTqwDDI-6B5D=mCYy|J>2~ zP0R3S>&P9bqr3M!$w_zbpTEdI>|P*OBX=*3vPOAVJAl0#UPoOXV>?^Y32#BZ$3vmUYKF;RJ zB-Nj-BCBObJpB0Xp2lf`ypU{bKV-p888w4%r|;BV?`SsFP0g zzZgVHRVUHCkZl4g5V8~js6Yn`7ShUajLk#39-|5UYztW list: + """ + Convert the gold entities to the target format according to the NER_MAPPING + """ + res = [NER_MAPPING[e["type"]] for e in x] + return res + + +def _ner_gold_to_target_v2(x: list) -> list: + """ + Convert the gold entities to the target format according to the NER_MAPPING + """ + res = [NER_MAPPING[e["type"]] for e in x] + return res + + +def ner_doc_to_target(doc): + ents = doc["entities"] + targ_str = "" + # Entità$Tipo%Entità$Tipo. + if ents == []: + return NO_ENT_STRING + else: + for e in ents: + targ_str += ( + e["entity_text"] + NER_TYPE_SEPARATOR + e["type"] + NER_ENTITY_SEPARATOR + ) + return targ_str[:-1] + + +def ner_process_results(doc, results): + """ + Process the results of the Named Entity Recognition task + """ + # each document has a list of entities with the following format: + # [{"entity_text": "string", "type": "string"}] + gold = doc["entities"] + raw_results = results[0] + results = _ner_process_raw_output(raw_results) + + gold_labels = _ner_gold_to_target(gold) + res_labels = [0] * len(gold_labels) + matched_gold_idx = [] + + if len(results) > len(gold): + for r in results: + r_text = r[0] + r_type = r[1] + for i in range(len(gold)): + if r_text == gold[i]["entity_text"] and r_type == gold[i]["type"]: + res_labels[i] = NER_MAPPING[r_type] + matched_gold_idx.append(i) + # Since we have more results than gold, we artificially set to false positive the remaining labels + # extend gold label list + for i in range(len(results) - len(gold)): + gold_labels.append(3) + res_labels.append(2) + elif len(results) == 0 and len(gold) == 0: + res_labels = [3] + gold_labels = res_labels + else: # len(results) <= len(gold) + for r in results: + r_text = r[0] + r_type = r[1] + for i in range(len(gold)): + if r_text == gold[i]["entity_text"] and r_type == gold[i]["type"]: + res_labels[i] = NER_MAPPING[r_type] + matched_gold_idx.append(i) + # we map all wrong predictions to the "O" class + for i in range(len(gold_labels)): + if i in matched_gold_idx: + continue + if gold_labels[i] == 1: + res_labels[i] = 3 + elif gold_labels[i] == 0: + res_labels[i] = 3 + else: + res_labels[i] = 3 + + assert len(gold_labels) == len(res_labels) + return {"f1": (res_labels, gold_labels)} + + +def ner_process_results_v2(doc, results): + """ + Process the results of the Named Entity Recognition task + This version considers and score explicitly when the model responds that there are no entities + """ + # each document has a list of entities with the following format: + # [{"entity_text": "string", "type": "string"}] + gold = doc["entities"] + raw_results = results[0] + results = _ner_process_raw_output_v2(raw_results) + + # eval_logger.debug(f"results {results}") + # eval_logger.debug(f"gold {gold}") + + gold_labels = _ner_gold_to_target_v2(gold) + res_labels = [0] * len(gold_labels) + matched_gold_idx = [] + + if len(results) > len(gold): + for r in results: + # print(r) + r_text = r[0] + r_type = r[1] + for i in range(len(gold)): + if r_text == gold[i]["entity_text"] and r_type == gold[i]["type"]: + res_labels[i] = NER_MAPPING[r_type] + matched_gold_idx.append(i) + # Since we have more results than gold, we artificially set to false positive the remaining labels + # extend gold label list + for i in range(len(results) - len(gold)): + # gold_labels.append(3) + # res_labels.append(2) + gold_labels.append(4) + res_labels.append(3) + elif len(results) == 0 and len(gold) == 0: + # res_labels = [random.choice([0, 1, 2, 3])] + res_labels = [3] + gold_labels = res_labels + elif len(results) == 1 and results[0] == NO_ENT_STRING: + # res_labels = [3] + res_labels = [4] + gold_labels = res_labels + else: # len(results) <= len(gold) + for r in results: + r_text = r[0] + r_type = r[1] + for i in range(len(gold)): + if r_text == gold[i]["entity_text"] and r_type == gold[i]["type"]: + res_labels[i] = NER_MAPPING[r_type] + matched_gold_idx.append(i) + # we map all wrong predictions to the "O" class + for i in range(len(gold_labels)): + if i in matched_gold_idx: + continue + if gold_labels[i] == 1: + # res_labels[i] = 2 + res_labels[i] = 4 + elif gold_labels[i] == 0: + # res_labels[i] = 1 + res_labels[i] = 4 + else: + res_labels[i] = 4 + + assert len(gold_labels) == len(res_labels) + return {"f1": (res_labels, gold_labels)} + + +def _ner_process_raw_output(llm_result: str) -> list[tuple]: + if NO_ENT_STRING in llm_result: + return [] + if llm_result == "": + return ["WRONG"] + tmp_results = llm_result.split(NER_ENTITY_SEPARATOR) + results = [] + for res in tmp_results: + r = res.strip() + # split on type separator + r_text = "" + r_type = "" + r_splitted = r.split(NER_TYPE_SEPARATOR) + if len(r_splitted) < 2: + r_text = r_splitted[0] + r_type = "" + else: + r_text = r_splitted[0] + r_type = r_splitted[1] + if r_text != "": + results.append((r_text, r_type.upper())) + return results + + +def _ner_process_raw_output_v2(llm_result: str) -> list[tuple]: + if NO_ENT_STRING in llm_result: + return [NO_ENT_STRING] + if llm_result == "": + return ["WRONG"] + tmp_results = llm_result.split(NER_ENTITY_SEPARATOR) + results = [] + for res in tmp_results: + r = res.strip() + # split on type separator + r_text = "" + r_type = "" + r_splitted = r.split(NER_TYPE_SEPARATOR) + if len(r_splitted) < 2: + r_text = r_splitted[0] + r_type = "" + else: + r_text = r_splitted[0] + r_type = r_splitted[1] + if r_text != "": + results.append((r_text, r_type.upper())) + return results + + +# ---------------------- RELATION EXTRACTION ---------------------- + + +def _rel_process_raw_output(llm_result: str) -> list[str]: + if NO_REL_STRING in llm_result: + return [] + if llm_result == "": + return ["WRONG"] + tmp_results = llm_result.split(INTER_REL_SEPARATOR) + relations = [] + for res in tmp_results: + r_text1 = "" + r_text2 = "" + r_splitted = res.split(INTRA_REL_SEPARATOR) + if len(r_splitted) < 2: + r_text1 = r_splitted[0].strip() + r_text2 = "" + else: + r_text1 = r_splitted[0].strip() + r_text2 = r_splitted[1].strip() + relations.append((r_text1, r_text2)) + assert len(relations) == len(tmp_results) + return relations + + +INTER_REL_SEPARATOR = "%" +INTRA_REL_SEPARATOR = "$" +NO_REL_STRING = "&&NOREL&&" + + +def re_doc_to_target(doc): + ents = doc["relations"] + targ_str = "" + # Entità$Tipo%Entità$Tipo. + if ents == []: + return NO_ENT_STRING + else: + for e in ents: + targ_str += e[0] + INTRA_REL_SEPARATOR + e[1] + INTER_REL_SEPARATOR + return targ_str[:-1] + + +def _rel_gold_to_target(x: list) -> list: + if x == []: + return [0] + else: + return [1] * len(x) + + +def rel_doc_to_target(doc): + rel = doc["relations"] + targ_str = "" + # misura1$result1%misure2$result2. + if rel == []: + return NO_REL_STRING + else: + for r in rel: + targ_str += r[0] + "$" + r[1] + "%" + return targ_str[:-1] + + +def _extract_relations(results): + relations = [] + for r in results: + r_text1 = "" + r_text2 = "" + r_splitted = r.split(INTRA_REL_SEPARATOR) + if len(r_splitted) < 2: + r_text1 = r_splitted[0] + r_text2 = "" + else: + r_text1 = r_splitted[0] + r_text2 = r_splitted[1] + relations.append((r_text1, r_text2)) + assert len(relations) == len(results) + return relations + + +def rel_process_results_v3(doc, results): + """ + Process the results of the Relation extraction task not considering the order of the relation extracted + """ + # each document has a list of relation with the following format: + # [[text1, text2], [text3, text4]] + gold = doc["relations"] + raw_results = results[0] + has_results = 0 if NO_REL_STRING in raw_results else 1 + has_gold = 1 if gold != [] else 0 + + res_labels = [] + gold_labels = [] + + if has_results == 0 and has_gold: + # False negative + gold_labels = _rel_gold_to_target(gold) + res_labels = [0] * len(gold_labels) + elif has_results == 0 and has_gold == 0: + # True negative + gold_labels = _rel_gold_to_target(gold) + res_labels = gold_labels + elif has_results and has_gold == 0: + # False positive + gold_labels = _rel_gold_to_target(gold) + res_labels = [1] * len(gold_labels) + else: + results = _rel_process_raw_output(raw_results) + # results = raw_results.split(INTER_REL_SEPARATOR) + gold_labels = _rel_gold_to_target(gold) + res_labels = [0] * len(gold_labels) + assert len(gold) > 0 + for i in range(len(gold)): + for j in range(len(results)): + r_text1 = results[j][0] + r_text2 = results[j][1] + + if r_text1 == gold[i][0] and r_text2 == gold[i][1]: # list of lists + res_labels[i] = 1 + results[j] = ("DELETED", "DELETED") + elif r_text1 == "DELETED" and r_text2 == "DELETED": + continue + else: + pass + # if there are more predictions than gold, we set the remaining predictions to false positive + if len(results) - len(gold) > 0: + for i in range(len(results) - len(gold)): + if results[i] == ("DELETED", "DELETED"): + continue + res_labels.append(1) + gold_labels.append(0) + + assert len(gold_labels) == len(res_labels) + return {"f1": (res_labels, gold_labels)} + + +LS_SPLIT_REGEX = r"[^,]+" + + +def split_text_with_regex(text, pattern): + """ + pattern: str - a regex pattern to match the text + text: str - the text to split + """ + import re + + # Get text with model-generated words for comparison with the gold standard + text = text.split("\n")[0] + + # Find all matches for the pattern + matches = re.findall(pattern, text) + # Split each matched segment further if it contains a comma and is quoted + result = [] + for match in matches: + if match.startswith('"') and match.endswith('"'): + # Remove the quotes and split inside the quoted string + inner_matches = re.findall(r"[^,]+", match[1:-1]) + result.extend(inner_matches) + else: + result.append(match) + + # Strip leading and trailing whitespaces from each element + result = [element.strip().replace('"', "") for element in result] + + return result + + +# ---------------------- SUMMARIZATION ---------------------- + + +def rouge1_score(references, predictions, **kwargs): + """ + suboptimal way of compute rouge because of the following issue: + https://github.com/EleutherAI/lm-evaluation-harness/issues/1302 + """ + rouge = load("rouge") + return rouge.compute(predictions=predictions, references=references, **kwargs)[ + "rouge1" + ] + + +def process_results_sum(doc, results): + """ + Process the results of the Evalita summarization task + """ + ref = doc["summary"] if "summary" in doc.keys() else doc["target"] + rouge_scorer = load("rouge", keep_in_memory=True) + r1score = rouge_scorer.compute(predictions=results, references=[ref])["rouge1"] + + return { + "rouge1": r1score, + } + + +def faq_doc_to_target(x): + if x["correct_answer"] == "A": + return 0 + elif x["correct_answer"] == "B": + return 1 + elif x["correct_answer"] == "C": + return 2 + elif x["correct_answer"] == "D": + return 3 + else: + eval_logger.warning( + 'WARNING: correct answer not found or not in ["A", "B", "C", "D"]' + ) + + +def ht_doc_to_target(x): + if x["source"] == "ilgiornale": + return 0 + elif x["source"] == "repubblica": + return 1 + else: + eval_logger.warning( + 'WARNING: source not found or not in ["ilgiornale", "repubblica"]' + ) -- GitLab From 684fd2ddc249930a071ac9920221048070ea8d99 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Tue, 11 Feb 2025 13:11:48 +0000 Subject: [PATCH 24/32] Delete lm_eval/tasks/evalita_llm/single_prompt.zip (#2687) --- lm_eval/tasks/evalita_llm/single_prompt.zip | Bin 13335 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 lm_eval/tasks/evalita_llm/single_prompt.zip diff --git a/lm_eval/tasks/evalita_llm/single_prompt.zip b/lm_eval/tasks/evalita_llm/single_prompt.zip deleted file mode 100644 index 3a8534bc54055cc7908fe4a96e290b7e8c28d2ba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13335 zcmcIq2RPMl`?ohq_TG-!v9kA`Nj7Egb&QNC^AL%Qh-{}2*+fQ0NHUUfWJD>-tnAhQ ze0yK7qv4nLf4xta4%hd(zW2F5_vg8v=l5yKqo&P$n!`j1S(Ggm}#L z%e%~NeJouZy)AjYAm(0IT+O{Ly`0VMAbkFot}Z|9w7wm-d)=CC@AW3z{wF)OI(2Ss zV1kJ?vSJAiSwq50+qhjuvKF=`o}jy>F{kFi7d!!A7Oz|J{D9xo0fET8Bp{u!ZEQ4vYO=a4F^Un1u|_n^J{P zdvtIEseA!dczvcN3`0Ig*8q*}!epC=GdUl{w29=U($X!+h$-dE_oeV@2*bpJV9PVk zFZM4{-B!y-vM!iDGl3`G;ARze9Q$thaCBp$sDpJ!CUnuQ@^L6pVSVU^{&3BWkccL+Q?}$U0Nb&aMYK-~sx$9_P3^9nsPeX?BRpPrk-wVFyEr+-YFi##D81}~&YVZ9l?ofJYrIW%?B#7XjDY~9C%x6o zmKR$K%#_Vd@v0rZKMQ>%5WxF2M>#{}Zh^F=eYE5kbK?x(4;RHLHQ>d*{(Hj3uGXS8 z?bSXHwY7Km)su=Oq;Eq9NhHOs$Kzem>#BpTrxdS6y1era<;&_-o{|H{ln)hy*PbMc z%8L7U?XGw=?hs~4LgyA{F(r55*AweK{Cx^se0_C2a^$*abDo6KDJ>UAu;i~R6su5( zC%v4P?$u%oFj%=MiPeG@{*W$_&kJJ(WI5^b@v)cAqnL+^mYDkAxAXINBW7p0r@gWS z_)X2;ZN+w>(%cwaXqS``XMY@AfW|;cV9RA6)tl{a^nDvG`K6HPRQVg@7t&FU+;VVD z2OU)$YNkrxTZuPy{KGsuT(qe0H;gEgDk=Q2KyI+F+nyJ@BE~jSqE6R#*fnm%TpY0N z74&~dpgTuUrh~4uHEYzs@ojq^y61PPrIF0T+oD~SUvYXaf=o0Ltd!MfcW?3P#U48^ z(8HMgzzy7eRDepKWz4YPh+P5@=8#I+c8A%m1M!soSSne1|42xOT%s$BKobS&Q1m;_h2_l zBagFTO1!ncmM&ege8svV7wM>Dy&?UJ8CkPiZ?o@8esT(CS>Ybkh`1<9n=P3vD~F?P3V9NC75}q@ zLrr#F*f)WC7Oe#riwhS~7N!lK2YK?1pct+jswcNM%>;m``h^Cf`*#%SQ$+5LNA4v= z@N)585K*s4h-3g_Q0)?Xg&2V{fb?MukWYnsh$rA(8h z1`|PeKxXUcV1TYkw2*&zy#R@)=-Hd^pk(K^Bqr)3ZA(>ig&2E81mPeo{M86rG8N;D z!FH{ps4g$5L?|DKmpf0FC6DL%g0zQa9;oztxUWWj2@@$kA&=2J3a6L2?3&zI0<#Ml zABXpI$mt22SM+dl*l&;TQT0A!B;{gb(;?}>7IjU5b}M=EW|oZ1dN*wG5`5e@T!b`8 zO}rCqNlN_|po!`e_<6SP;Ug>FQQ(BQQ{59JGBmTZpwH4h9cO7e&I4n++IeBH=8`fT z6Q6BF_LLUdUF*ArwV)-aUZGLHZXpXnj?Nr|jVWp378ZcH4T^fwIwE}n$o?L4Ah}8) zFhA)D@hABI_vb`X&;}QwVv5vF9n@C{r?7E$%4^^LdQKn{Ya8L@@tfP*_NUE#44p2; z);pog5h(3!;Z0x1&)0j;K7(!s6Ce<&wBiwnvsI>XdAVP2zsSef-#A-vn@OOHA%Xfq zli!(qHrN&N2NybK*et^(JUt`!cb3PO7ua^Q=QPf-$Q&G@r3huh%Nth>-7=% zE08bgXgMQ5Zk#oAXbh*-or`U9oo}>W5CuEsKCs%yNhUE+|Gu-p3Wm2|A9x+mSMkmw z*v`8c{=xxsao5aaZhSB?M3;N(RgNzE0%2nj>4%-C1*_4Xb%G&QvLMh-U=?NmMEHHp z)$&T}%K12Ic!n|&twQX><&A*=Nhi?+-LJIZ)1tfILNe2f-m{0$lWnZfD{a1vWh#%k zAQaFjMC>pS9y?V%g?eWe$DMEn_wq_PYG3Xb?!aPkq52nz>`TSn;URe0&}!=`&*yB- zcOtpj?iq?Del(=s)I7sWN~B;CY6V{LPL(&DvSSRW|2lRmg_Q^YZgB=@SFX6;Qvh=- zlC<$X1m*y;zsFnDOz!xVvp)sJ;E|wJ(jN zu2B+Ro&c)>pOtTmTqwER3xEDV2Q+7qi%BlRGK*p{&s*#gw@M&F&q2DCjWP9F(=TD` zR3Z}(Mm;>_9ukp9^D_6WH(pySTv=zl?-WN^vB{!Kz|b#p)76^voQks#VRT+jdbE{Y z(QJ{Umf9Q15U&>yEZ&(i#kVhvb*7`v?=j#Rm3PBxEO(Wj27SspyR*F4y|nfD-BWUP z6`$FMKSEg+AT4Q#2qO@%!%$v8szBc$0?qjA_W5xsuyuvFSR%6ce`-Z1M8R_K^HbrX zf4&VP{a|#<+Z(CBG@*E#-k6$nO;~@v5MQ#6Q1vim45fI<`uPHg$uCs1EhH4Y zaK>&JDw7y zC!I<;ec)p#d!L{W=gfn-(=N36id1qh0%jX!!EtS-TcJ3{pjpoqV!u9ew#gRmSx&4@ zTWQ()kL1i%XUiLnub~4O1UB@N0JtInfc_q8fRulEEp>$a>0b03$-*^GN?VbiAbcmi z)#bZEu9jTe>1@Sl&CD(nQJH8ujN{F!Y#;Y*t#2{W~xiJ}% zJS`v*xjeOVJs^@M@J+$Nji6SM?p#Y|y;Emw7ItrSRHJAVIk}bc8)bCM^t6Sci%R;) zQ({)}1wb<}k_NJ^FqECvaQa53rvrz@ufn=N+?U+jmZCMA7fx`RgDJR7HU-eImDINx zt!{QgcozkXXDP#nc6lmVv{w4(RlQ}{g3siiS!<@g+!gZ#dp?Z3HA(#bT6)$6o+WGm z)x!7Y-f#q}0CvAe^}oA#{u$u^{l$~u$Hj9)s_Wq5DcsT|_3OnGS)edP)SJg&JOP2i z&e8htv_Dp%&~h6C6GJz|+Zkni@Zfy?_iJvQ#sDp}uJM7wQN@z6?yH8v7T?G%CDhMY z(S7E)5mo}DYV!ZKsYZQ9S^ZAuU{ZfY8g}$2Zn}O*YqWe0Hncri9bV3r0D3s5q$AJR zXb3AB=O!6uBgp$U$E}f*FfCJVQS|b1ci`K+VH!=nj51**gI(X^&&eMs#9ED7o$5ZJ znY9fJM_6ApbIyM_^XyA$c-_@0bYoS*mR_p&u`bTfRRl$`vmtmpIbo(i%b3vsyS`mFz8fKNo=JSi7VEss@Z0<6_?&louYO~sqj0JUT&l;1>#X)V ziF!Iox4)_D&$YSinILpSu;@bKY2kAtrF z`uh3Tkx0jJvQ)OSWd>*x!DV4r&wkt*;IE@nB-?DZ09_Q*@W$%d+kR|VW}|Ew*Xd-> zxx>0%f?+9`E62p-5dLyfd7L`kC4a|e>|D0 zI|mK=%S|w%Kj8ypN|Vn~fwsqT$xe`}kzQG4E?~Sp$vtpoi46B1UdJn?)$02?-78r} zW%_odw%6w_%o4IcJP%K0=H8bu*y!G-XrtsdDy_K|>})jJ0dcwMyr`FvF1mjQZ0H-v zh+oL&=iO1uGKTf_6>V(DnKcd@MC$S0`s9K^kA+2L+JkpdW`qP=mE&w7!}XGyr_+h~ z#2I^%k&z7d+;wSEsuk=j8xv#k2~DI=O|ZFeUbPpd6{}{jl6umgV{>`U!_B9{9{1m5 zdc*&T`3ISL9yIXERWs{HwUYSkEoNnYrOV7J4ez`5;<<}Q)T{(MwhNw5zv@xJ`l&2(u%f??l>43kxHf{nQ*PGUcxQU9IXd3zM(Zm=?y_*O z(pme;HM{D`9=i_}a9hdcr_C}z_PxeemxVT?(PU zx0*ZfP>DXu{m4!grlzq6J{3n~;l~xOBUs>F*Q|(}WOd&Kah6p?z8{uM@8hbD@vf7c zZPQw|gls&uifnWBtn9h*3yd0$LdfGWYUQdDPQP-Po-~6=BrmMo*PrkFj zdwuYYr}-e6DlmDotV&;XSB`GPOnX)N&P18RLA2`n=jZZR+K!Y`Ty=s( zlb{!!853kBczl&0^6=2Fx8sDdR_!hMFS%rN(5@Gf8*>TQ7j__-=UXr72j4=T13f+ztowZeOvhycXS&exx-Ztw0^VQRvfdF+D8`tuaeM z8Lho{xoJc)ma~TS9ZA=W{{Grq{}(PsK9B2MAFx*2J752COEw4_P9j7=48Z*R02Cs^ z148+^XNBDK0M1`~PxmW-NAgQI`H5O(2{3j7pfb0a9|f4;DY{dEJD6GH7cpAq>sUxI zd(`U%i==r4LyIMiJShZky@R>(zJ=k5UdLMSjZC2RtrP(HYY(`V;S`)!eejjI`I{b?KTQo&m-0EMMQ~4?;MHjZ;E8JnxHe2XN{4&W8SD zrR(ssgnbO#ZY8uhu^=jf$28I1S-Q8u)bj7LZhUDZ3+G~$27&+ z`Rfe#^RZXYpO8fYuu;4LD{6dZT$+Q%EPpRw(7^2b)0*`%r%A8*2pa=*}|+_3Aq z7ZmdyMX!YQ$~~;eiS*@rsgG0{atp3#m$eJ`Pp{0W1b39L_@&Ntj$YD#fgjLlf@~wI}0wYa~6HEe(}FT@8P_)H+9hR^7EGcr!y^ z)+~2a*xy}`ClHbv0e6mpwGNTcTQ#0l>sfQcXH0MZ0Qb!`eKFNGqz1{IF3wDSz|dP6 z5&~^VE`6GrUNKx&i@N!P$j_22se#%SaQYAq2Lzc=F;sJMA}3U**`if z^IpZ}YxDuZ{65r_Jr~aKcM>J*o>PB&P5mn6Lc#Z)HL6huf+=oW{?AvU5`A=aFJlv} zFj4nuu)p6_@LGyloi~?q8zsukhoL5pWbo$~GQdSs2L}j+m5P>jYJ%GYjiEN6IBp7S z%Az5bv^m{wM!#8u47UtArXj0VsIL_LHVeo!ag^}u<8684d0h7~lTFa$G20&hpv~)@ z^TWD%kU6{c3DU{}DAC*(95g=rO#)2;?eScPT^m6+)M10k)0gUA%>@Zvbh>i!m8{#9 zImNxH;BTJ_cQAnjL45TQXa<2PfZidl6lRA#t`-CV>3^l!Vb}fe7&?FNkG-uc@&D;< z$tL@(Ol2qby}D#!W5UkWRigS~dG$^L5#bD9#O~^s_&86;o-rJ6<-X`fmyF!NTXfAi z-}9*DM1ytoh(4fGzIq#O8!0&Zo)1qcXa{Bff}*MgJ!7`ke8Br0J%Xf?v{wq{vg<>I z(|j)Ye#Wl{y%rX!owXErb+a*V7P=_r560n zAa9l!{DrOv`z57__i7N~W8sCz`fpeTuP<-JCB+n-8jTpC&V+oW#TBw7je9zJS$0qt zPZX29kOj0zaJxW0DwABNCZk_^T-=*4p4;?Xh)}t3rc$R$N=V+u0GJqr?s5B{)6mv@7cmR5di_9|9wz^IvCJ||E2-+ zYkdCqRqr^5hZLcr%+?)e+}t1O?CUPN)~{7B^8R&1lrT)k@yy)Q_Ry7M&LG!#4YzT` z*4>cCjPESA(`D5tr6EO)CU>Y5F}jz@#HA6T8Zdx`>KU_+)u`g0keF0KW=-qOA-$h9 zlaECPt@?c76q~o%GILZ@-f22+gLu5^+Jwngl7V`i@s!&%J_Gsm>(w;WTrHvMh4XY= z`Z8r+!@O_bRvB`Nka2bACJ}dG(C)_43t44kwu_B#H(HC4&T18K6WZ-sNTgqzikF-V z$tLWDvg}B~NU9d@ak_I5Xk*GtKEvm8A9As!45JY?=Vmxi)@!cjk;S?(_O8S zvtIAmIow{7kQvo54q8_! zdHLqdU;}d_G_CEeOlfvc2U{)XN4wAq=@j7Kcg;mrvn~(*6 zY5w!%iks*b9L<>W&KZ;88}dtMQXDYmcY?xb-c`QW>u{1&f)laZEVDYdMjFAjKscv! z2F5|Sa?kM}1h4pw)96p9UkyUl4iO<*6f>LUTfMl5>&{)Iw`SrB73R?8y^aCzVR92%G7zW8lv<4x@-& z1~EOj17oZ`+)q*Gb{hZKlvgZ3}p-94^5nC|!f0Jse+>@SYuW@8sMR zei2qSv_2c<)@dx!0N)42*IsdmMOL8o;y)r6PwDDn(A5)&EU(Ft#Bv^jdKzc^i^{LF zY89~&-~s^aXrQ8zqW^h94iTXrYf%pPI689m&+~HsdiTM^9P*48P_Q2?Scv8???$d` zkR~PmD%BqNbYK^G_zIXHJ6N!gDg*+5b66ce!uOkl3;8Pmx)<`$4lw0)uwWsHh|}~a ztjMmPVCDpJxg$@=hWD55< zn+MSUS33*2Wer^X4i+p#0smN|CmRj;*;nND>QQ_j6C7vuB;9djn?U0hxs3@lZVwhL zB$VhEn@6hMziHzlTLTz`+{rr{&Dc}FSUW6w%um!P{n;pTqwDDI-6B5D=mCYy|J>2~ zP0R3S>&P9bqr3M!$w_zbpTEdI>|P*OBX=*3vPOAVJAl0#UPoOXV>?^Y32#BZ$3vmUYKF;RJ zB-Nj-BCBObJpB0Xp2lf`ypU{bKV-p888w4%r|;BV?`SsFP0g zzZgVHRVUHCkZl4g5V8~js6Yn`7ShUajLk#39-|5UYztW Date: Wed, 12 Feb 2025 11:36:19 -0600 Subject: [PATCH 25/32] Update unitxt task.py to bring in line with recent repo changes (#2684) --- lm_eval/tasks/unitxt/task.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lm_eval/tasks/unitxt/task.py b/lm_eval/tasks/unitxt/task.py index 78e5c106..7fcf0cc0 100644 --- a/lm_eval/tasks/unitxt/task.py +++ b/lm_eval/tasks/unitxt/task.py @@ -109,6 +109,7 @@ class Unitxt(ConfigurableTask): apply_chat_template: bool = False, fewshot_as_multiturn: bool = False, chat_template: Optional[Callable] = None, + gen_prefix: Optional[str] = None, ) -> str: source = self.doc_to_text(doc) if isinstance(source, list): @@ -134,6 +135,7 @@ class Unitxt(ConfigurableTask): part of the document for `doc`. """ kwargs.pop("apply_chat_template", False) # Not used by unitxt + kwargs.pop("chat_template", False) # Not used by unitxt return [ Instance( request_type="generate_until", -- GitLab From 96f5e58f6cfb48f5ae7071eb5f58fe0b1df40467 Mon Sep 17 00:00:00 2001 From: achervyakov <77295913+artemorloff@users.noreply.github.com> Date: Thu, 13 Feb 2025 01:43:09 +0500 Subject: [PATCH 26/32] change ensure_ascii to False for JsonChatStr (#2691) --- lm_eval/models/api_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py index c24cea95..b9e75736 100644 --- a/lm_eval/models/api_models.py +++ b/lm_eval/models/api_models.py @@ -265,7 +265,7 @@ class TemplateAPI(TemplateLM): ) else: # bit of a hack. We'll load back before sending to the API - return JsonChatStr(json.dumps(chat_history)) + return JsonChatStr(json.dumps(chat_history, ensure_ascii=False)) @cached_property def eot_token_id(self) -> Optional[int]: -- GitLab From c3c05b067d28ecd238d6cffef32ddcbfd085a99d Mon Sep 17 00:00:00 2001 From: "James A. Michaelov" <32554945+jmichaelov@users.noreply.github.com> Date: Thu, 13 Feb 2025 06:41:32 -0500 Subject: [PATCH 27/32] set aggregation and higher_is_better (instead of falling back on defaults) (#2692) --- lm_eval/tasks/blimp/_template_yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lm_eval/tasks/blimp/_template_yaml b/lm_eval/tasks/blimp/_template_yaml index f81e7938..650a160b 100644 --- a/lm_eval/tasks/blimp/_template_yaml +++ b/lm_eval/tasks/blimp/_template_yaml @@ -9,5 +9,7 @@ should_decontaminate: true doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}" metric_list: - metric: acc + aggregation: mean + higher_is_better: true metadata: version: 1.0 -- GitLab From ef6f524349a55019a1ed4dd8de978f5011178bcd Mon Sep 17 00:00:00 2001 From: Kiersten Stokes Date: Fri, 14 Feb 2025 04:49:26 -0600 Subject: [PATCH 28/32] Update remaining references to assistant_prefill to gen_prefix (#2683) --- docs/task_guide.md | 2 +- lm_eval/api/task.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/task_guide.md b/docs/task_guide.md index 4e055044..e1695c20 100644 --- a/docs/task_guide.md +++ b/docs/task_guide.md @@ -37,7 +37,7 @@ Prompting / in-context formatting options: - **doc_to_choice** (`Union[Callable, str]`, *optional*) — Jinja2 template, string, or function to process a sample into a list of possible string choices for `multiple_choice` tasks. Left undefined for `generate_until` tasks. - **fewshot_delimiter** (`str`, *optional*, defaults to "\n\n") — String to insert between few-shot examples. - **target_delimiter** (`str`, *optional*, defaults to `" "`) — String to insert between input and target output for the datapoint being tested. -- **assistant_prefill** (`str`, *optional*) — String to append after the <|assistant|> token. For example, if the task is to generate a question, the assistant_prefill could be "The answer is: " to prompt the model to generate an answer to the question. If not using a chat template then this string will be appended to the end of the prompt. +- **gen_prefix** (`str`, *optional*) — String to append after the <|assistant|> token. For example, if the task is to generate a question, the gen_prefix could be "The answer is: " to prompt the model to generate an answer to the question. If not using a chat template then this string will be appended to the end of the prompt. Runtime configuration options: - **num_fewshot** (`int`, *optional*, defaults to 0) — Number of few-shot examples before the input. diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py index a1cf5519..e64fb43e 100644 --- a/lm_eval/api/task.py +++ b/lm_eval/api/task.py @@ -1049,6 +1049,8 @@ class ConfigurableTask(Task): Whether to provide the fewshot examples as a multiturn conversation or a single user turn. :param chat_template: callable (from lm.apply_chat_template) that takes in a list[Dict] chat transcript and renders it into a string. + :param gen_prefix: + String to append after the <|assistant|> token. :returns: str The fewshot context. """ -- GitLab From 157d8c3ce92a5d2f1f4e1ff4e42c869e153956af Mon Sep 17 00:00:00 2001 From: Irina Proskurina <72871167+upunaprosk@users.noreply.github.com> Date: Fri, 14 Feb 2025 11:51:29 +0100 Subject: [PATCH 29/32] Update README.md (#2694) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1dc08d55..d72057e5 100644 --- a/README.md +++ b/README.md @@ -489,7 +489,8 @@ Extras dependencies can be installed via `pip install -e ".[NAME]"` | api | For using api models (Anthropic, OpenAI API) | | deepsparse | For running NM's DeepSparse models | | dev | For linting PRs and contributions | -| gptq | For loading models with GPTQ | +| gptq | For loading models with AutoGPTQ | +| gptqmodel | For loading models with GPTQModel | | hf_transfer | For speeding up HF Hub file downloads | | ifeval | For running the IFEval task | | ibm_watsonx_ai | For using IBM watsonx.ai model apis | -- GitLab From 5a5acc08bd19f71832b72cb677a2a90bb41ddfdf Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Fri, 14 Feb 2025 13:33:41 +0000 Subject: [PATCH 30/32] fix `construct_requests` kwargs (#2700) --- lm_eval/tasks/fda/task.py | 4 +++- lm_eval/tasks/squad_completion/task.py | 4 +++- lm_eval/tasks/squadv2/task.py | 4 +++- lm_eval/tasks/swde/task.py | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lm_eval/tasks/fda/task.py b/lm_eval/tasks/fda/task.py index 1b01c2bc..a8261841 100644 --- a/lm_eval/tasks/fda/task.py +++ b/lm_eval/tasks/fda/task.py @@ -33,7 +33,9 @@ class FDA(ConfigurableTask): def doc_to_target(self, doc): return doc["value"] - def construct_requests(self, doc, ctx, **kwargs): + def construct_requests( + self, doc, ctx, chat_template=None, apply_chat_template=False, **kwargs + ): """Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. diff --git a/lm_eval/tasks/squad_completion/task.py b/lm_eval/tasks/squad_completion/task.py index 7603a986..81464c5d 100644 --- a/lm_eval/tasks/squad_completion/task.py +++ b/lm_eval/tasks/squad_completion/task.py @@ -33,7 +33,9 @@ class SQUADCompletion(ConfigurableTask): def doc_to_target(self, doc): return doc["value"] - def construct_requests(self, doc, ctx, **kwargs): + def construct_requests( + self, doc, ctx, chat_template=None, apply_chat_template=False, **kwargs + ): """Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. diff --git a/lm_eval/tasks/squadv2/task.py b/lm_eval/tasks/squadv2/task.py index 5a77cb5f..48743f75 100644 --- a/lm_eval/tasks/squadv2/task.py +++ b/lm_eval/tasks/squadv2/task.py @@ -105,7 +105,9 @@ class SQuAD2(ConfigurableTask): answer = "unanswerable" return " " + answer - def construct_requests(self, doc, ctx, **kwargs): + def construct_requests( + self, doc, ctx, chat_template=None, apply_chat_template=False, **kwargs + ): """Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. diff --git a/lm_eval/tasks/swde/task.py b/lm_eval/tasks/swde/task.py index 7226364b..62dc1f2e 100644 --- a/lm_eval/tasks/swde/task.py +++ b/lm_eval/tasks/swde/task.py @@ -33,7 +33,9 @@ class SWDE(ConfigurableTask): def doc_to_target(self, doc): return doc["value"] - def construct_requests(self, doc, ctx, **kwargs): + def construct_requests( + self, doc, ctx, chat_template=None, apply_chat_template=False, **kwargs + ): """Uses RequestFactory to construct Requests and returns an iterable of Requests which will be sent to the LM. -- GitLab From 41b952f30c39abd8d2802a429119fb50c63b6a04 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Fri, 14 Feb 2025 15:38:08 +0000 Subject: [PATCH 31/32] `arithmetic`: set target delimiter to empty string (#2701) * set target delimiter to empty string * nit * add warning --- lm_eval/api/samplers.py | 11 +++++++++++ lm_eval/tasks/arithmetic/README.md | 3 +++ lm_eval/tasks/arithmetic/arithmetic_1dc.yaml | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lm_eval/api/samplers.py b/lm_eval/api/samplers.py index 3f81dfc6..5d1791bd 100644 --- a/lm_eval/api/samplers.py +++ b/lm_eval/api/samplers.py @@ -1,3 +1,5 @@ +import logging +import warnings from functools import partial from typing import TYPE_CHECKING, Iterable, Optional, Union @@ -9,6 +11,8 @@ if TYPE_CHECKING: from lm_eval.api.task import ConfigurableTask, Task +eval_logger = logging.getLogger("lm-eval") + class ContextSampler: def __init__( @@ -97,6 +101,13 @@ class ContextSampler: labeled_examples += self.doc_to_choice(doc)[doc_content] if doc_target != "": + if self.target_delimiter.isspace() and str(doc_target)[0].isspace(): + # TODO: add logger warn once here. + warnings.warn( + "Both target_delimiter and target start with a space. This may cause issues.", + Warning, + stacklevel=2, + ) labeled_examples += self.target_delimiter labeled_examples += prefix labeled_examples += ( diff --git a/lm_eval/tasks/arithmetic/README.md b/lm_eval/tasks/arithmetic/README.md index 7bfc27cb..e3d8ec5e 100644 --- a/lm_eval/tasks/arithmetic/README.md +++ b/lm_eval/tasks/arithmetic/README.md @@ -58,3 +58,6 @@ If other tasks on this dataset are already supported: * [ ] Is the "Main" variant of this task clearly denoted? * [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? * [ ] Have you noted which, if any, published evaluation setups are matched by this variant? + +### Changelog +version 2.0: (2025-Feb-14) set target delimiter to "" as the targets already start with a space. diff --git a/lm_eval/tasks/arithmetic/arithmetic_1dc.yaml b/lm_eval/tasks/arithmetic/arithmetic_1dc.yaml index 2e9c9277..0e3bc40b 100644 --- a/lm_eval/tasks/arithmetic/arithmetic_1dc.yaml +++ b/lm_eval/tasks/arithmetic/arithmetic_1dc.yaml @@ -8,11 +8,12 @@ validation_split: validation test_split: null doc_to_text: "{{context}}" doc_to_target: "{{completion}}" +target_delimiter: "" metric_list: - metric: acc aggregation: mean higher_is_better: true metadata: - version: 1.0 + version: 2.0 dataset_kwargs: trust_remote_code: true -- GitLab From 52df63b7b30da53c481ed9090598d9189fab1d91 Mon Sep 17 00:00:00 2001 From: Baber Abbasi <92168766+baberabb@users.noreply.github.com> Date: Mon, 17 Feb 2025 23:51:40 +0000 Subject: [PATCH 32/32] fix vllm (#2708) * fix vllm * fix data_parallel * copy to multimodal --- lm_eval/models/vllm_causallms.py | 6 ++---- lm_eval/models/vllm_vlms.py | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lm_eval/models/vllm_causallms.py b/lm_eval/models/vllm_causallms.py index 9b86c319..3aa80469 100644 --- a/lm_eval/models/vllm_causallms.py +++ b/lm_eval/models/vllm_causallms.py @@ -109,7 +109,7 @@ class VLLM(TemplateLM): eval_logger.warning( "You might experience occasional issues with model weight downloading when data_parallel is in use. To ensure stable performance, run with data_parallel_size=1 until the weights are downloaded and cached." ) - self.model_args["worker_use_ray"] = True + self.model_args["distributed_executor_backend"] = "ray" self.batch_size = "auto" eval_logger.info("Manual batching is not compatible with data parallelism.") @@ -246,9 +246,7 @@ class VLLM(TemplateLM): # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote # also seems to only work with decorator and not with ray.remote() fn # see https://github.com/vllm-project/vllm/issues/973 - # note: this has changed on 0.3.3, and it only works now if num_gpus are set. - # but then tensor_parallel breaks - @ray.remote + @ray.remote(num_gpus=1 if self.tensor_parallel_size == 1 else None) def run_inference_one_model( model_args: dict, sampling_params, diff --git a/lm_eval/models/vllm_vlms.py b/lm_eval/models/vllm_vlms.py index a0d72926..c87f0f70 100644 --- a/lm_eval/models/vllm_vlms.py +++ b/lm_eval/models/vllm_vlms.py @@ -109,9 +109,7 @@ class VLLM_VLM(VLLM): # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote # also seems to only work with decorator and not with ray.remote() fn # see https://github.com/vllm-project/vllm/issues/973 - # note: this has changed on 0.3.3, and it only works now if num_gpus are set. - # but then tensor_parallel breaks - @ray.remote + @ray.remote(num_gpus=1 if self.tensor_parallel_size == 1 else None) def run_inference_one_model( model_args: dict, sampling_params, requests: List[List[dict]] ): -- GitLab